From 6c1b443f17a7ebb44d8fba3534f10bd3d599b867 Mon Sep 17 00:00:00 2001
From: ahbarnett <alex.h.barnett@gmail.com>
Date: Fri, 4 Aug 2023 14:49:39 -0400
Subject: [PATCH 01/14] removed obsolete srcinfo.dipoles from stokes matlab
 example

---
 matlab/stfmm3d_example.m | 1 -
 vec-kernels/Makefile     | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/matlab/stfmm3d_example.m b/matlab/stfmm3d_example.m
index 438551b3..1785560f 100644
--- a/matlab/stfmm3d_example.m
+++ b/matlab/stfmm3d_example.m
@@ -47,7 +47,6 @@
 srcinfo.stoklet = rand(nd,3,ns);
 srcinfo.strslet = rand(nd,3,ns);
 srcinfo.strsvec = rand(nd,3,ns);
-srcinfo.dipoles = rand(nd,3,ns);
 
 targ = rand(3,nt);
 eps = 1e-5;
diff --git a/vec-kernels/Makefile b/vec-kernels/Makefile
index d8b8c179..16ba8b44 100644
--- a/vec-kernels/Makefile
+++ b/vec-kernels/Makefile
@@ -1,11 +1,11 @@
 AR = ar cru
 
-FC = gfortran
+FC = gfortran-9
 #FC = ifort
 FFLAGS = -Ofast -fopenmp -march=native -Wall -unroll-aggressive
 #FFLAGS = -Ofast -unroll-aggressive -xCORE-AVX512 -qopt-report:5 -qopt-zmm-usage=high -fopenmp
 
-CXX = g++
+CXX = g++-9
 #CXX = icpc
 CXXFLAGS = -Ofast -fopenmp -march=native -Wall -std=c++11 -unroll-aggressive # need C++11
 #CXXFLAGS = -Ofast -unroll-aggressive -xCORE-AVX512 -qopt-report:5 -qopt-zmm-usage=high -fopenmp -Wall -std=c++11# need C++11

From ec111263429c5fed223190dc1fefefd0437e5a5b Mon Sep 17 00:00:00 2001
From: ahbarnett <alex.h.barnett@gmail.com>
Date: Fri, 4 Aug 2023 16:01:59 -0400
Subject: [PATCH 02/14] stfmm3d_simpletest.m

---
 matlab/stfmm3d_simpletest.m | 47 +++++++++++++++++++++++++++++++++++++
 src/Stokes/stfmm3d.f        |  6 ++---
 2 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 matlab/stfmm3d_simpletest.m

diff --git a/matlab/stfmm3d_simpletest.m b/matlab/stfmm3d_simpletest.m
new file mode 100644
index 00000000..6702cd5d
--- /dev/null
+++ b/matlab/stfmm3d_simpletest.m
@@ -0,0 +1,47 @@
+disp("Perf and math test matlab driver for FMM3D/Stokes")
+disp("Source to target, stokeslet+stresslet (1-vec), vel pot only...");
+% Barnett 8/4/23
+
+clear
+ns = 10000;
+nt = 10000;
+eps = 1e-3;
+ifstrs = 1;   % include stresslets?
+ifppreg = 0; % no source eval
+ifppregtarg = 1;  % just vel out
+
+rng(0)
+srcinfo.sources = rand(3,ns);
+srcinfo.stoklet = rand(3,ns);   % stokeslet strength vectors
+if ifstrs
+  srcinfo.strslet = rand(3,ns);   % optional stresslet strengths and n-vecs
+  srcinfo.strsvec = rand(3,ns);
+end
+targ = rand(3,nt);
+
+tic;
+U = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg);
+t = toc;
+fprintf("%d to %d points, eps=%.3g, done in %.3g s (%.3g tot pts/sec)\n",ns,nt,eps,t,(ns+nt)/t)
+u = U.pottarg;
+i = randi(nt);   % which targ
+fprintf("velpot targ(i=%d) =\n",i);
+disp(u(:,i))
+
+% check vs direct formula
+ui = zeros(3,1);
+for j=1:ns
+  R = srcinfo.sources(:,j) - targ(:,i);  % vec
+  r = sqrt(sum(R.^2));                   % dist
+  f = srcinfo.stoklet(:,j);   % this Sto strength
+  ui = ui + (1/r)*f + (1/r^3)*R*dot(f,R);
+  if ifstrs
+    mu = srcinfo.strslet(:,j);   % this stresslet strength
+    nu = srcinfo.strsvec(:,j);   % this stresslet source vector (normal)
+    ui = ui + (6/r^5)*R*dot(mu,R)*dot(nu,R);     % apply T_{ijk}
+  end
+end
+ui = 0.5 * ui;   % FMM3D is 1/4pi off from true 1/8pi prefactor
+fprintf("rel err vs direct at ith targ: %.3g\n",norm(u(:,i)-ui)/norm(ui))
+
+
diff --git a/src/Stokes/stfmm3d.f b/src/Stokes/stfmm3d.f
index c388ef05..ff31c76c 100644
--- a/src/Stokes/stfmm3d.f
+++ b/src/Stokes/stfmm3d.f
@@ -51,11 +51,11 @@ subroutine stfmm3d(nd, eps,
 c     (note that each of these is a 3 vector per source point y^{(m)}).
 c     For x a source point, the self-interaction in the sum is omitted. 
 c
-c     Optionally, the associated pressure p(x) and gradient grad u(x)
-c     are returned
+c     Optionally, the associated pressure p(x) and 3x3 gradient tensor
+c     grad u(x) are returned
 c
 c       p(x) = sum_m P_j(x,y^m) sigma^{(m)}_j
-c          + sum_m T_{ijk}(x,y^{(m)}) PI_{jk} mu^{(m)}_j nu^{(m)}_k
+c          + sum_m PI_{jk}(x,y{(m)}) mu^{(m)}_j nu^{(m)}_k
 c
 c       grad u(x) = grad[sum_m G_{ij}(x,y^m) sigma^{(m)}_j
 c                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k]

From b426f439b97f874ec629a6e4f059ebfc8f2ba5ba Mon Sep 17 00:00:00 2001
From: ahbarnett <alex.h.barnett@gmail.com>
Date: Fri, 4 Aug 2023 16:57:47 -0400
Subject: [PATCH 03/14] clarify stfmm3d.f docs and fix typos in kernel defs

---
 matlab/stfmm3d_simpletest.m | 18 +++++++++---------
 src/Stokes/stfmm3d.f        | 36 +++++++++++++++++++++++-------------
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/matlab/stfmm3d_simpletest.m b/matlab/stfmm3d_simpletest.m
index 6702cd5d..d33008c2 100644
--- a/matlab/stfmm3d_simpletest.m
+++ b/matlab/stfmm3d_simpletest.m
@@ -3,11 +3,11 @@
 % Barnett 8/4/23
 
 clear
-ns = 10000;
-nt = 10000;
+ns = 100000;
+nt = 100000;
 eps = 1e-3;
-ifstrs = 1;   % include stresslets?
-ifppreg = 0; % no source eval
+ifstrs = 1;       % include stresslets?
+ifppreg = 0;      % no eval at sources
 ifppregtarg = 1;  % just vel out
 
 rng(0)
@@ -17,7 +17,7 @@
   srcinfo.strslet = rand(3,ns);   % optional stresslet strengths and n-vecs
   srcinfo.strsvec = rand(3,ns);
 end
-targ = rand(3,nt);
+targ = rand(3,nt);         % targs and srcs in same unit cube
 
 tic;
 U = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg);
@@ -28,17 +28,17 @@
 fprintf("velpot targ(i=%d) =\n",i);
 disp(u(:,i))
 
-% check vs direct formula
+% check u (velocity potential) vs direct formula
 ui = zeros(3,1);
 for j=1:ns
-  R = srcinfo.sources(:,j) - targ(:,i);  % vec
+  R = targ(:,i) - srcinfo.sources(:,j);  % x-y with std sign (targ-src).
   r = sqrt(sum(R.^2));                   % dist
-  f = srcinfo.stoklet(:,j);   % this Sto strength
+  f = srcinfo.stoklet(:,j);              % strength
   ui = ui + (1/r)*f + (1/r^3)*R*dot(f,R);
   if ifstrs
     mu = srcinfo.strslet(:,j);   % this stresslet strength
     nu = srcinfo.strsvec(:,j);   % this stresslet source vector (normal)
-    ui = ui + (6/r^5)*R*dot(mu,R)*dot(nu,R);     % apply T_{ijk}
+    ui = ui - (6/r^5)*R*dot(mu,R)*dot(nu,R);  % apply T_{ijk}, sign matches
   end
 end
 ui = 0.5 * ui;   % FMM3D is 1/4pi off from true 1/8pi prefactor
diff --git a/src/Stokes/stfmm3d.f b/src/Stokes/stfmm3d.f
index ff31c76c..b8e9ea76 100644
--- a/src/Stokes/stfmm3d.f
+++ b/src/Stokes/stfmm3d.f
@@ -3,23 +3,28 @@
 c
 c**********************************************************************
 c      
-c     We take the following conventions for the Stokes kernels
+c     We take the following conventions for the Stokes kernels.
 c
-c     For a source y and target x, let r_i = x_i-y_i
+c     1) The dynamic viscosity (mu) is assumed to be 1.
+c     2) All kernels are a factor 4pi larger than standard definitions
+c        (this is for historical reasons).
+c     Thus, in general, divide the velocity (potential or grad) outputs
+c     by 4pi.mu, and pressure by 4pi, to recover standard definitions.
+c
+c     For a source y and target x, let r_i = x_i-y_i     (note sign)
 c     and let r = sqrt(r_1^2 + r_2^2 + r_3^2)
 c
 c     The Stokeslet, G_{ij}, and its associated pressure tensor, P_j,
-c     (without the 1/4pi scaling) are
+c     we define as
 c
-c     G_{ij}(x,y) = (r_i r_j)/(2r^3) + delta_{ij}/(2r)
+c     G_{ij}(x,y) = ( delta_{ij}/r  + r_i r_j / r^3 )/2
 c     P_j(x,y) = r_j/r^3
 c
 c     The (Type I) stresslet, T_{ijk}, and its associated pressure
-c     tensor, PI_{jk}, (without the 1/4pi scaling) are
+c     tensor, PI_{jk}, we define as
 c     
 c     T_{ijk}(x,y) = -3 r_i r_j r_k/ r^5
-c     PI_{jk} = -2 delta_{jk} + 6 r_j r_k/r^5      
-c
+c     PI_{jk}(x,y) = -2 delta_{jk}/r^3 + 6 r_j r_k/r^5      
       
 
       subroutine stfmm3d(nd, eps, 
@@ -41,25 +46,30 @@ subroutine stfmm3d(nd, eps,
 c     interactions (ignoring self-interactions) and
 c     interactions with targs.
 c      
-c     This routine computes sums of the form
+c     This routine computes the sum for the velocity,
 c
-c       u(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
+c       u_i(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
 c                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
 c
-c     where sigma^{(m)} is the Stokeslet charge, mu^{(m)} is the
-c     stresslet charge, and nu^{(m)} is the stresslet orientation
+c     for x at all of the target locations,
+c     where sigma^{(m)} is the Stokeslet force, mu^{(m)} is the
+c     stresslet strength, and nu^{(m)} is the stresslet orientation
 c     (note that each of these is a 3 vector per source point y^{(m)}).
+c     The vector output index is i=1,2,3, and repeated indices are
+c     taken as summed over 1,2,3, ie, Einstein convention.
 c     For x a source point, the self-interaction in the sum is omitted. 
 c
 c     Optionally, the associated pressure p(x) and 3x3 gradient tensor
-c     grad u(x) are returned
+c     (grad u(x))_{li} are returned
 c
 c       p(x) = sum_m P_j(x,y^m) sigma^{(m)}_j
 c          + sum_m PI_{jk}(x,y{(m)}) mu^{(m)}_j nu^{(m)}_k
 c
-c       grad u(x) = grad[sum_m G_{ij}(x,y^m) sigma^{(m)}_j
+c       grad_l u_i(x) = grad_l [sum_m G_{ij}(x,y^m) sigma^{(m)}_j
 c                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k]
 c
+c     Note that by combining these two the stress tensor may be gotten.
+c
 c-----------------------------------------------------------------------
 c     INPUT PARAMETERS:
 c     

From 5408c55aa8c6f4dffdcf46f2336dc0bf2d6e0f0e Mon Sep 17 00:00:00 2001
From: ahbarnett <alex.h.barnett@gmail.com>
Date: Mon, 7 Aug 2023 15:16:36 -0400
Subject: [PATCH 04/14] tidy up Stokes docs in Fortran and MATLAB

---
 matlab/fmm3d.c        |  20 +++---
 matlab/fmm3d.mw       | 138 ++++++++++++++++++++----------------------
 matlab/fmm3d_legacy.c |   2 +-
 matlab/st3ddir.m      |  62 +++----------------
 matlab/stfmm3d.m      |  76 ++++++++++++++++-------
 src/Stokes/stfmm3d.f  |  14 ++---
 6 files changed, 146 insertions(+), 166 deletions(-)

diff --git a/matlab/fmm3d.c b/matlab/fmm3d.c
index f4685474..7eeacbac 100644
--- a/matlab/fmm3d.c
+++ b/matlab/fmm3d.c
@@ -2,7 +2,7 @@
 /* Automatically generated by mwrap                    */
 /* --------------------------------------------------- */
 
-/* Code generated by mwrap 1.0 */
+/* Code generated by mwrap 1.1 */
 /*
   Copyright statement for mwrap:
 
@@ -6097,7 +6097,7 @@ void mexStub21(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 1094 ----
+/* ---- fmm3d.mw: 1128 ----
  * stfmm3d(int[1] nd, double[1] eps, int[1] ns, double[3, ns] sources, int[1] ifstoklet, double[nd3, ns_stok] stoklet, int[1] ifstrslet, double[nd3, ns_strs] strslet, double[nd3, ns_strs] strsvec, int[1] ifppreg, inout double[nd3, ns_pot] pot, inout double[nd, ns_pre] pre, inout double[nd9, ns_grad] grad, int[1] nt, double[3, nt] targ, int[1] ifppregtarg, inout double[nd3, nt_pot] pottarg, inout double[nd, nt_pre] pretarg, inout double[nd9, nt_grad] gradtarg, inout int[1] ier);
  */
 static const char* stubids22_ = "stfmm3d(i int[x], i double[x], i int[x], i double[xx], i int[x], i double[xx], i int[x], i double[xx], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], io int[x])";
@@ -6479,7 +6479,7 @@ void mexStub22(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 1233 ----
+/* ---- fmm3d.mw: 1223 ----
  * st3ddirectstokg(int[1] nd, double[3, ns] sources, double[nd3, ns_stok] stoklet, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd3, nt] pottarg, inout double[nd, nt] pretarg, inout double[nd9, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids23_ = "st3ddirectstokg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -6685,7 +6685,7 @@ void mexStub23(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 1236 ----
+/* ---- fmm3d.mw: 1226 ----
  * st3ddirectstokstrsg(int[1] nd, double[3, ns] sources, double[nd3, ns_stok] stoklet, int[1] istress, double[nd3, ns_strs] strslet, double[nd3, ns_strs] strsvec, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd3, nt] pottarg, inout double[nd, nt] pretarg, inout double[nd9, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids24_ = "st3ddirectstokstrsg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -7046,9 +7046,9 @@ void mexFunction(int nlhs, mxArray* plhs[],
         mexPrintf("%d calls to fmm3d.mw:661\n", mexprofrecord_[19]);
         mexPrintf("%d calls to fmm3d.mw:797\n", mexprofrecord_[20]);
         mexPrintf("%d calls to fmm3d.mw:938\n", mexprofrecord_[21]);
-        mexPrintf("%d calls to fmm3d.mw:1094\n", mexprofrecord_[22]);
-        mexPrintf("%d calls to fmm3d.mw:1233\n", mexprofrecord_[23]);
-        mexPrintf("%d calls to fmm3d.mw:1236\n", mexprofrecord_[24]);
+        mexPrintf("%d calls to fmm3d.mw:1128\n", mexprofrecord_[22]);
+        mexPrintf("%d calls to fmm3d.mw:1223\n", mexprofrecord_[23]);
+        mexPrintf("%d calls to fmm3d.mw:1226\n", mexprofrecord_[24]);
     } else if (strcmp(id, "*profile log*") == 0) {
         FILE* logfp;
         if (nrhs != 2 || mxGetString(prhs[1], id, sizeof(id)) != 0)
@@ -7079,9 +7079,9 @@ void mexFunction(int nlhs, mxArray* plhs[],
         fprintf(logfp, "%d calls to fmm3d.mw:661\n", mexprofrecord_[19]);
         fprintf(logfp, "%d calls to fmm3d.mw:797\n", mexprofrecord_[20]);
         fprintf(logfp, "%d calls to fmm3d.mw:938\n", mexprofrecord_[21]);
-        fprintf(logfp, "%d calls to fmm3d.mw:1094\n", mexprofrecord_[22]);
-        fprintf(logfp, "%d calls to fmm3d.mw:1233\n", mexprofrecord_[23]);
-        fprintf(logfp, "%d calls to fmm3d.mw:1236\n", mexprofrecord_[24]);
+        fprintf(logfp, "%d calls to fmm3d.mw:1128\n", mexprofrecord_[22]);
+        fprintf(logfp, "%d calls to fmm3d.mw:1223\n", mexprofrecord_[23]);
+        fprintf(logfp, "%d calls to fmm3d.mw:1226\n", mexprofrecord_[24]);
         fclose(logfp);
     } else
         mexErrMsgTxt("Unknown identifier");
diff --git a/matlab/fmm3d.mw b/matlab/fmm3d.mw
index 4ea7c7c9..41e7c1b2 100644
--- a/matlab/fmm3d.mw
+++ b/matlab/fmm3d.mw
@@ -951,42 +951,74 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
+% STFMM3D - Stokes 3D FMM
 %
+% U = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
 %
-%  Stokes FMM in R^{3}: evaluate all pairwise particle
-%  interactions (ignoring self-interactions) and
-%  interactions with targs.
-%
-%  This routine computes sums of the form
+%  Stokes FMM in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and, possibly,
+%  interactions with targets, to precision eps, using the fast
+%  multipole method (linear scaling in number of points).
+%      
+%  This routine computes the sum for the velocity vector,
 %
-%  u(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
-%       + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
+%       u_i(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
+%                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
 %
-%  where sigma^{(m)} is the Stokeslet charge, mu^{(m)} is the
-%  stresslet charge, and nu^{(m)} is the stresslet orientation
+%  for x at all of the target locations, and i=1,2,3,
+%  where sigma^{(m)} is the Stokeslet force, mu^{(m)} is the
+%  stresslet strength, and nu^{(m)} is the stresslet orientation
 %  (note that each of these is a 3 vector per source point y^{(m)}).
-%  For x a source point, the self-interaction in the sum is omitted.
+%  The stokeslet kernel G and stresslet kernel T are defined below.
+%  Repeated indices are taken as summed over 1,2,3, ie, Einstein
+%  convention. For x a source point, the self-interaction in the
+%  sum is omitted.
+%
+%  Optionally, the associated pressure p(x) and 3x3 gradient tensor
+%  grad u(x) are returned,
+%
+%      p(x) = sum_m P_j(x,y^m) sigma^{(m)}_j
+%          + sum_m PI_{jk}(x,y{(m)}) mu^{(m)}_j nu^{(m)}_k
+%
+%      grad_l u_i(x) = grad_l [sum_m G_{ij}(x,y^m) sigma^{(m)}_j
+%                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k]
+%
+%  where the pressure stokeslet P and pressure stresslet PI are defined
+%  below. Note that these two may be combined to get the stress tensor.
 %
-%  Optionally, the associated pressure p(x) and gradient grad u(x)
-%  are returned
+%  We use the following as the kernel definitions, noting that:
+%     1) The dynamic viscosity (mu) is assumed to be 1.
+%     2) All kernels are a factor 4pi larger than standard definitions
+%        (this is for historical reasons).
+%     Thus, in general, divide the velocity (potential or grad) outputs
+%     by 4pi.mu, and pressure by 4pi, to recover standard definitions.
 %
-%    p(x) = sum_m P_j(x,y^m) sigma^{(m)}_j
-%         + sum_m T_{ijk}(x,y^{(m)}) PI_{jk} mu^{(m)}_j nu^{(m)}_k
+%  For a source y and target x, let r_i = x_i-y_i     (note sign)
+%  and let r = sqrt(r_1^2 + r_2^2 + r_3^2)
+%
+%  The Stokeslet, G_{ij}, and its associated pressure tensor, P_j,
+%  we define as
+%
+%     G_{ij}(x,y) = ( delta_{ij}/r  + r_i r_j / r^3 )/2
+%     P_j(x,y) = r_j/r^3
+%
+%  The (Type I) stresslet, T_{ijk}, and its associated pressure
+%  tensor, PI_{jk}, we define as
+%   
+%     T_{ijk}(x,y) = -3 r_i r_j r_k/ r^5
+%     PI_{jk}(x,y) = -2 delta_{jk}/r^3 + 6 r_j r_k/r^5      
 %
-%    grad u(x) = grad[sum_m G_{ij}(x,y^m) sigma^{(m)}_j
-%              + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k]
 % 
 %  Args:
 %
 %  -  eps: double   
 %        precision requested
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about sources:
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
-%           number of densities (optional, 
+%           number of density vectors for the same points (optional, 
 %           default - nd = 1)
 %     *  srcinfo.stoklet: double(nd,3,n) 
 %           Stokeslet charge strengths, $sigma_{j}$ (optional, 
@@ -1002,14 +1034,14 @@ end
 %        | potential at sources evaluated if ifppreg = 1
 %        | potential and pressure at sources evaluated if ifppreg=2
 %        | potential, pressure and gradient at sources evaluated if ifppreg=3
-%
 %  -  targ: double(3,nt)
 %        target locations, $t_{i}$ (optional)
 %  -  ifppregtarg: integer
 %        | target eval flag (optional)
 %        | potential at targets evaluated if ifppregtarg = 1
 %        | potential and pressure at targets evaluated if ifppregtarg = 2 
-%        | potential, pressure and gradient at targets evaluated if ifppregtarg = 3
+%        | potential, pressure and gradient at targets evaluated if
+%        |   ifppregtarg = 3
 %  
 %  Returns:
 %  
@@ -1019,6 +1051,8 @@ end
 %  -  U.pottarg: velocity at target locations if requested
 %  -  U.pretarg: pressure at target locations if requested
 %  -  U.gradtarg: gradient of velocity at target locations if requested
+%
+% See also: st3ddir
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
@@ -1110,64 +1144,20 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = st3ddir(srcinfo,targ,ifppregtarg)
+% ST3DDIR - Stokes 3D direct slow kernel evaluation (tester for STFMM3D) 
 %
+%  U = st3ddir(srcinfo,targ,ifppregtarg)
 %
-%  Stokes FMM in R^{3}: evaluate all pairwise particle
+%  Stokes direct evaluation in R^3: evaluate all pairwise particle
 %  interactions (ignoring self-interactions) and
-%  interactions with targs.
-%
-%  This routine computes sums of the form
+%  interactions with targs. This is the slow O(N^2) direct code used
+%  as a reference for testing the (fast) code stfmm3d.
 %
-%  u(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
-%       + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
-%
-%  where sigma^{(m)} is the Stokeslet charge, mu^{(m)} is the
-%  stresslet charge, and nu^{(m)} is the stresslet orientation
-%  (note that each of these is a 3 vector per source point y^{(m)}).
-%  For x a source point, the self-interaction in the sum is omitted.
+%  Kernel definitions, input and outputs arguments are identical to
+%  stfmm3d (see that function for all definitions), apart from:
+%  1) there are currently no outputs at sources (ie, as if ifppreg=0)
 %
-%  Optionally, the associated pressure p(x) and gradient grad u(x)
-%  are returned
-%
-%    p(x) = sum_m P_j(x,y^m) sigma^{(m)}_j
-%         + sum_m T_{ijk}(x,y^{(m)}) PI_{jk} mu^{(m)}_j nu^{(m)}_k
-%
-%    grad u(x) = grad[sum_m G_{ij}(x,y^m) sigma^{(m)}_j
-%              + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k]
-% 
-%  Args:
-%
-%  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
-%     *  srcinfo.sources: double(3,n)    
-%           source locations, $x_{j}$
-%     *  srcinfo.nd: integer
-%           number of densities (optional, 
-%           default - nd = 1)
-%     *  srcinfo.stoklet: double(nd,3,n) 
-%           Stokeslet charge strengths, $sigma_{j}$ (optional, 
-%           default - term corresponding to Stokeslet charge strengths dropped)
-%     *  srcinfo.strslet: double(nd,3,n) 
-%           stresslet strengths, $mu_{j}$ (optional
-%           default - term corresponding to stresslet strengths dropped) 
-%     *  srcinfo.strsvec: double(nd,3,n) 
-%           stresslet orientations, $nu_{j}$ (optional
-%           default - term corresponding to stresslet orientations dropped) 
-%
-%  -  targ: double(3,nt)
-%        target locations, $t_{i}$ (optional)
-%  -  ifppregtarg: integer
-%        | target eval flag (optional)
-%        | potential at targets evaluated if ifppregtarg = 1
-%        | potential and pressure at targets evaluated if ifppregtarg = 2 
-%        | potential, pressure and gradient at targets evaluated if ifppregtarg = 3
-%  
-%  Returns:
-%  
-%  -  U.pottarg: velocity at target locations if requested
-%  -  U.pretarg: pressure at target locations if requested
-%  -  U.gradtarg: gradient of velocity at target locations if requested
+% See also: stfmm3d
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
diff --git a/matlab/fmm3d_legacy.c b/matlab/fmm3d_legacy.c
index e60f3873..ff766f57 100644
--- a/matlab/fmm3d_legacy.c
+++ b/matlab/fmm3d_legacy.c
@@ -2,7 +2,7 @@
 /* Automatically generated by mwrap                    */
 /* --------------------------------------------------- */
 
-/* Code generated by mwrap 1.0 */
+/* Code generated by mwrap 1.1 */
 /*
   Copyright statement for mwrap:
 
diff --git a/matlab/st3ddir.m b/matlab/st3ddir.m
index e2f95b79..b1edaef5 100644
--- a/matlab/st3ddir.m
+++ b/matlab/st3ddir.m
@@ -1,62 +1,18 @@
 function [U] = st3ddir(srcinfo,targ,ifppregtarg)
+% ST3DDIR - Stokes 3D direct slow kernel evaluation (tester for STFMM3D) 
 %
+%  U = st3ddir(srcinfo,targ,ifppregtarg)
 %
-%  Stokes FMM in R^{3}: evaluate all pairwise particle
+%  Stokes direct evaluation in R^3: evaluate all pairwise particle
 %  interactions (ignoring self-interactions) and
-%  interactions with targs.
+%  interactions with targs. This is the slow O(N^2) direct code used
+%  as a reference for testing the (fast) code stfmm3d.
 %
-%  This routine computes sums of the form
+%  Kernel definitions, input and outputs arguments are identical to
+%  stfmm3d (see that function for all definitions), apart from:
+%  1) there are currently no outputs at sources (ie, as if ifppreg=0)
 %
-%  u(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
-%       + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
-%
-%  where sigma^{(m)} is the Stokeslet charge, mu^{(m)} is the
-%  stresslet charge, and nu^{(m)} is the stresslet orientation
-%  (note that each of these is a 3 vector per source point y^{(m)}).
-%  For x a source point, the self-interaction in the sum is omitted.
-%
-%  Optionally, the associated pressure p(x) and gradient grad u(x)
-%  are returned
-%
-%    p(x) = sum_m P_j(x,y^m) sigma^{(m)}_j
-%         + sum_m T_{ijk}(x,y^{(m)}) PI_{jk} mu^{(m)}_j nu^{(m)}_k
-%
-%    grad u(x) = grad[sum_m G_{ij}(x,y^m) sigma^{(m)}_j
-%              + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k]
-% 
-%  Args:
-%
-%  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
-%     *  srcinfo.sources: double(3,n)    
-%           source locations, $x_{j}$
-%     *  srcinfo.nd: integer
-%           number of densities (optional, 
-%           default - nd = 1)
-%     *  srcinfo.stoklet: double(nd,3,n) 
-%           Stokeslet charge strengths, $sigma_{j}$ (optional, 
-%           default - term corresponding to Stokeslet charge strengths dropped)
-%     *  srcinfo.strslet: double(nd,3,n) 
-%           stresslet strengths, $mu_{j}$ (optional
-%           default - term corresponding to stresslet strengths dropped) 
-%     *  srcinfo.strsvec: double(nd,3,n) 
-%           stresslet orientations, $nu_{j}$ (optional
-%           default - term corresponding to stresslet orientations dropped) 
-%
-%  -  targ: double(3,nt)
-%        target locations, $t_{i}$ (optional)
-%  -  ifppregtarg: integer
-%        | target eval flag (optional)
-%        | potential at targets evaluated if ifppregtarg = 1
-%        | potential and pressure at targets evaluated if ifppregtarg = 2 
-%        | potential, pressure and gradient at targets evaluated if ifppregtarg = 3
-%  
-%  Returns:
-%  
-%  -  U.pottarg: velocity at target locations if requested
-%  -  U.pretarg: pressure at target locations if requested
-%  -  U.gradtarg: gradient of velocity at target locations if requested
+% See also: stfmm3d
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
diff --git a/matlab/stfmm3d.m b/matlab/stfmm3d.m
index 62c6432a..e11acdd2 100644
--- a/matlab/stfmm3d.m
+++ b/matlab/stfmm3d.m
@@ -1,40 +1,72 @@
 function [U] = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
+% STFMM3D - Stokes 3D FMM
 %
+% U = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
 %
-%  Stokes FMM in R^{3}: evaluate all pairwise particle
-%  interactions (ignoring self-interactions) and
-%  interactions with targs.
+%  Stokes FMM in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and, possibly,
+%  interactions with targets, to precision eps, using the fast
+%  multipole method (linear scaling in number of points).
+%      
+%  This routine computes the sum for the velocity vector,
 %
-%  This routine computes sums of the form
+%       u_i(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
+%                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
 %
-%  u(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
-%       + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
-%
-%  where sigma^{(m)} is the Stokeslet charge, mu^{(m)} is the
-%  stresslet charge, and nu^{(m)} is the stresslet orientation
+%  for x at all of the target locations, and i=1,2,3,
+%  where sigma^{(m)} is the Stokeslet force, mu^{(m)} is the
+%  stresslet strength, and nu^{(m)} is the stresslet orientation
 %  (note that each of these is a 3 vector per source point y^{(m)}).
-%  For x a source point, the self-interaction in the sum is omitted.
+%  The stokeslet kernel G and stresslet kernel T are defined below.
+%  Repeated indices are taken as summed over 1,2,3, ie, Einstein
+%  convention. For x a source point, the self-interaction in the
+%  sum is omitted.
+%
+%  Optionally, the associated pressure p(x) and 3x3 gradient tensor
+%  grad u(x) are returned,
+%
+%      p(x) = sum_m P_j(x,y^m) sigma^{(m)}_j
+%          + sum_m PI_{jk}(x,y{(m)}) mu^{(m)}_j nu^{(m)}_k
+%
+%      grad_l u_i(x) = grad_l [sum_m G_{ij}(x,y^m) sigma^{(m)}_j
+%                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k]
+%
+%  where the pressure stokeslet P and pressure stresslet PI are defined
+%  below. Note that these two may be combined to get the stress tensor.
 %
-%  Optionally, the associated pressure p(x) and gradient grad u(x)
-%  are returned
+%  We use the following as the kernel definitions, noting that:
+%     1) The dynamic viscosity (mu) is assumed to be 1.
+%     2) All kernels are a factor 4pi larger than standard definitions
+%        (this is for historical reasons).
+%     Thus, in general, divide the velocity (potential or grad) outputs
+%     by 4pi.mu, and pressure by 4pi, to recover standard definitions.
 %
-%    p(x) = sum_m P_j(x,y^m) sigma^{(m)}_j
-%         + sum_m T_{ijk}(x,y^{(m)}) PI_{jk} mu^{(m)}_j nu^{(m)}_k
+%  For a source y and target x, let r_i = x_i-y_i     (note sign)
+%  and let r = sqrt(r_1^2 + r_2^2 + r_3^2)
+%
+%  The Stokeslet, G_{ij}, and its associated pressure tensor, P_j,
+%  we define as
+%
+%     G_{ij}(x,y) = ( delta_{ij}/r  + r_i r_j / r^3 )/2
+%     P_j(x,y) = r_j/r^3
+%
+%  The (Type I) stresslet, T_{ijk}, and its associated pressure
+%  tensor, PI_{jk}, we define as
+%   
+%     T_{ijk}(x,y) = -3 r_i r_j r_k/ r^5
+%     PI_{jk}(x,y) = -2 delta_{jk}/r^3 + 6 r_j r_k/r^5      
 %
-%    grad u(x) = grad[sum_m G_{ij}(x,y^m) sigma^{(m)}_j
-%              + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k]
 % 
 %  Args:
 %
 %  -  eps: double   
 %        precision requested
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about sources:
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
-%           number of densities (optional, 
+%           number of density vectors for the same points (optional, 
 %           default - nd = 1)
 %     *  srcinfo.stoklet: double(nd,3,n) 
 %           Stokeslet charge strengths, $sigma_{j}$ (optional, 
@@ -50,14 +82,14 @@
 %        | potential at sources evaluated if ifppreg = 1
 %        | potential and pressure at sources evaluated if ifppreg=2
 %        | potential, pressure and gradient at sources evaluated if ifppreg=3
-%
 %  -  targ: double(3,nt)
 %        target locations, $t_{i}$ (optional)
 %  -  ifppregtarg: integer
 %        | target eval flag (optional)
 %        | potential at targets evaluated if ifppregtarg = 1
 %        | potential and pressure at targets evaluated if ifppregtarg = 2 
-%        | potential, pressure and gradient at targets evaluated if ifppregtarg = 3
+%        | potential, pressure and gradient at targets evaluated if
+%        |   ifppregtarg = 3
 %  
 %  Returns:
 %  
@@ -67,6 +99,8 @@
 %  -  U.pottarg: velocity at target locations if requested
 %  -  U.pretarg: pressure at target locations if requested
 %  -  U.gradtarg: gradient of velocity at target locations if requested
+%
+% See also: st3ddir
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
diff --git a/src/Stokes/stfmm3d.f b/src/Stokes/stfmm3d.f
index b8e9ea76..730be470 100644
--- a/src/Stokes/stfmm3d.f
+++ b/src/Stokes/stfmm3d.f
@@ -46,21 +46,21 @@ subroutine stfmm3d(nd, eps,
 c     interactions (ignoring self-interactions) and
 c     interactions with targs.
 c      
-c     This routine computes the sum for the velocity,
+c     This routine computes the sum for the velocity vector,
 c
 c       u_i(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
 c                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
 c
-c     for x at all of the target locations,
+c     for x at all of the target locations, and i=1,2,3,
 c     where sigma^{(m)} is the Stokeslet force, mu^{(m)} is the
 c     stresslet strength, and nu^{(m)} is the stresslet orientation
 c     (note that each of these is a 3 vector per source point y^{(m)}).
-c     The vector output index is i=1,2,3, and repeated indices are
-c     taken as summed over 1,2,3, ie, Einstein convention.
-c     For x a source point, the self-interaction in the sum is omitted. 
+c     Repeated indices are taken as summed over 1,2,3, ie, Einstein
+c     convention. For x a source point, the self-interaction in the
+c     sum is omitted. 
 c
 c     Optionally, the associated pressure p(x) and 3x3 gradient tensor
-c     (grad u(x))_{li} are returned
+c     grad u(x) are returned
 c
 c       p(x) = sum_m P_j(x,y^m) sigma^{(m)}_j
 c          + sum_m PI_{jk}(x,y{(m)}) mu^{(m)}_j nu^{(m)}_k
@@ -68,7 +68,7 @@ subroutine stfmm3d(nd, eps,
 c       grad_l u_i(x) = grad_l [sum_m G_{ij}(x,y^m) sigma^{(m)}_j
 c                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k]
 c
-c     Note that by combining these two the stress tensor may be gotten.
+c     Note that these two may be combined to get the stress tensor.
 c
 c-----------------------------------------------------------------------
 c     INPUT PARAMETERS:

From 6410b79874758c13cbe5c1d4434b6b9de53cbc1f Mon Sep 17 00:00:00 2001
From: ahbarnett <alex.h.barnett@gmail.com>
Date: Mon, 7 Aug 2023 15:34:05 -0400
Subject: [PATCH 05/14] make stokes and em docs in matlab more matlab-like and
 clear

---
 matlab/fmm3d.mw | 93 ++++++++++++++++++++-----------------------------
 1 file changed, 37 insertions(+), 56 deletions(-)

diff --git a/matlab/fmm3d.mw b/matlab/fmm3d.mw
index 41e7c1b2..6dad2ff6 100644
--- a/matlab/fmm3d.mw
+++ b/matlab/fmm3d.mw
@@ -668,18 +668,27 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
+% EMFMM3D.  FMM in 3D for Maxwell (electromagnetic) kernels
 %
+%  U = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
+%
+%  Maxwell FMM in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and
+%  interactions with targs, using the fast multipole method
+%  with precision eps.
+%
+%  This subroutine evaluates sums implied by the operator representation
+%
+%      E = curl S_{k}[h_current] + S_{k}[e_current] + grad S_{k}[e_charge]
+%
+%  by calling the vector Helmholtz FMM.
+%  With appropriate input flags, the subroutine also computes divE, curlE.
 %
-%  This subroutine computes
-%      E = curl S_{k}[h_current] + S_{k}[e_current] + grad S_{k}[e_charge]  -- (1)
-%  using the vector Helmholtz fmm.
-%  The subroutine also computes divE, curlE
-%  with appropriate flags
 %  Remark: the subroutine uses a stabilized representation
 %  for computing the divergence by using integration by parts
 %  wherever possible. If the divergence is not requested, then the
-%  helmholtz fmm is called with 3*nd densities, while if the divergence
-%  is requested, then the helmholtz fmm is calld with 4*nd densities
+%  Helmholtz FMM is called with 3*nd densities, while if the divergence
+%  is requested, then the Helmholtz FMM is called with 4*nd densities
 % 
 %  Args:
 %
@@ -712,11 +721,14 @@ end
 %        curl E is returned at the target locations if ifcurlE = 1
 %  -  ifdivE: integer
 %        div E is returned at the target locations if ifdivE = 1
+%
 %  Returns:
 %  
-%  -  U.E: E field defined in (1) above at target locations if requested
+%  -  U.E:     E field defined in above equation at targets if requested
 %  -  U.curlE: curl of E field at target locations if requested
-%  -  U.divE: divergence of E at target locations if requested
+%  -  U.divE:  divergence of E at target locations if requested
+%
+% See also: HFMM3D, EM3DDIR
 
   if(nargin<5)
     return;
@@ -809,53 +821,20 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = em3ddir(zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
+% EM3DDIR.  Slow direct Maxwell kernel evaluations (reference for EMFMM3D)
 %
+% U = em3ddir(zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
-%  This subroutine computes
-%      E = curl S_{k}[h_current] + S_{k}[e_current] + grad S_{k}[e_charge]  -- (1)
-%  using the vector Helmholtz fmm.
-%  The subroutine also computes divE, curlE
-%  with appropriate flags
-%  Remark: the subroutine uses a stabilized representation
-%  for computing the divergence by using integration by parts
-%  wherever possible. If the divergence is not requested, then the
-%  helmholtz fmm is called with 3*nd densities, while if the divergence
-%  is requested, then the helmholtz fmm is calld with 4*nd densities
-% 
-%  Args:
+%  Maxwell direct evaluation in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and
+%  interactions with targs. This is the slow O(N^2) direct code used
+%  as a reference for testing the fast code emfmm3d.
 %
-%  -  zk: complex
-%        Helmholtz parameter, k
-%  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
-%     *  srcinfo.sources: double(3,n)    
-%           source locations, $x_{j}$
-%     *  srcinfo.nd: integer
-%           number of charge/dipole vectors (optional, 
-%           default - nd = 1)
-%     *  srcinfo.h_current: complex(nd,3,n) 
-%           a vector source (optional,
-%           default - term corresponding to h_current dropped) 
-%     *  srcinfo.e_current: complex(nd,3,n) 
-%           b vector source (optional,
-%           default - term corresponding to e_current dropped) 
-%     *  srcinfo.e_charge: complex(nd,n) 
-%           e_charge source (optional, 
-%           default - term corresponding to e_charge dropped)
-%  -  targ: double(3,nt)
-%        target locations, $t_{i}$
-%  -  ifE: integer
-%        E is returned at the target locations if ifE = 1
-%  -  ifcurlE: integer
-%        curl E is returned at the target locations if ifcurlE = 1
-%  -  ifdivE: integer
-%        div E is returned at the target locations if ifdivE = 1
-%  Returns:
-%  
-%  -  U.E: E field defined in (1) above at target locations if requested
-%  -  U.curlE: curl of E field at target locations if requested
-%  -  U.divE: divergence of E at target locations if requested
+%  Kernel definitions, input and outputs arguments are identical to
+%  emfmm3d (see that function for all definitions), except that the first
+%  argument (eps) is absent.
+%
+%  See also: emfmm3d
 
   if(nargin<4)
     return;
@@ -951,7 +930,7 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
-% STFMM3D - Stokes 3D FMM
+% STFMM3D.  FMM in 3D for Stokes (viscous fluid hydrodynamic) kernels
 %
 % U = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
 %
@@ -1144,7 +1123,7 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = st3ddir(srcinfo,targ,ifppregtarg)
-% ST3DDIR - Stokes 3D direct slow kernel evaluation (tester for STFMM3D) 
+% ST3DDIR.  Stokes 3D direct slow kernel evaluation (reference for STFMM3D)
 %
 %  U = st3ddir(srcinfo,targ,ifppregtarg)
 %
@@ -1155,7 +1134,9 @@ end
 %
 %  Kernel definitions, input and outputs arguments are identical to
 %  stfmm3d (see that function for all definitions), apart from:
-%  1) there are currently no outputs at sources (ie, as if ifppreg=0)
+%  1) the first argument (eps) is absent.
+%  2) there are currently no outputs at sources (ie, as if ifppreg=0),
+%     meaning that U.pot, U.pre, and U.grad are missing.
 %
 % See also: stfmm3d
 

From 440ea3886a15d19b7dfe0c39c3927fe98282f03b Mon Sep 17 00:00:00 2001
From: ahbarnett <alex.h.barnett@gmail.com>
Date: Mon, 7 Aug 2023 16:08:00 -0400
Subject: [PATCH 06/14] lap and helm matlab docs more matlab-like, tidy up

---
 matlab/fmm3d.mw | 115 +++++++++++++++++++++++++++---------------------
 1 file changed, 64 insertions(+), 51 deletions(-)

diff --git a/matlab/fmm3d.mw b/matlab/fmm3d.mw
index 6dad2ff6..d6a21ab9 100644
--- a/matlab/fmm3d.mw
+++ b/matlab/fmm3d.mw
@@ -1,10 +1,17 @@
 % ---------------------------------------------------------------------
 @function [U,varargout] = hfmm3d(eps,zk,srcinfo,pg,varargin)
+% HFMM3D.  FMM in 3D for Helmholtz (acoustic frequency-domain) kernel
 %
+% [U,varargout] = hfmm3d(eps,zk,srcinfo,pg,varargin)
+%
+%  Helmholtz FMM in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and possibly
+%  interactions with targets, using the fast multipole method
+%  with precision eps.
 %
 %  This subroutine computes the N-body Helmholtz
-%  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $e^{ikr}/r$
+%  interactions and its gradients in three dimensions, where 
+%  the interaction kernel is given by $e^{ikr}/r$,
 % 
 %    u(x) = \sum_{j=1}^{N} c_{j} \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|} - 
 %      v_{j} \cdot \nabla \left( \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|}\right)   
@@ -22,8 +29,7 @@
 %  -  zk: complex
 %        Helmholtz parameter, k
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -58,13 +64,13 @@
 %  
 %  Returns:
 %  
-%  -  U.pot: potential at source locations, if requested, $u(x_{j})$
-%  -  U.grad: gradient at source locations, if requested, $\nabla u(x_{j})$
-%  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
+%  -  U.pot:      potential at source locations, if requested, $u(x_{j})$
+%  -  U.grad:     gradient at source locations, if requested, $\nabla u(x_{j})$
+%  -  U.pottarg:  potential at target locations, if requested, $u(t_{i})$
 %  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
 %
-%  - ier: error code for fmm run
-%  - timeinfo: time taken in each step of the fmm
+%  - ier: error code for FMM run
+%  - timeinfo: time taken in each step of the FMM
 %       timeinfo(1): form multipole step
 %       timeinfo(2): multipole->multipole translation step
 %       timeinfo(3): multipole to local translation, form local + multipole eval step
@@ -89,8 +95,7 @@
 %     Call the FMM for sources only with default arguments, returns
 %     the error code for the FMM as well and the time split
 %      
- 
-
+% See also: H3DDIR
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
@@ -202,11 +207,13 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = h3ddir(zk,srcinfo,targ,pgt)
+% H3DDIR.   Direct (slow) 3D Helmholtz kernel sums (reference for HFMM3D)
 %
+%  U = h3ddir(zk,srcinfo,targ,pgt)
 %
 %  This subroutine computes the N-body Helmholtz
-%  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $e^{ikr}/r$
+%  interactions and its gradients in three dimensions, where 
+%  the interaction kernel is given by $e^{ikr}/r$,
 % 
 %    u(x) = \sum_{j=1}^{N} c_{j} \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|} - 
 %      v_{j} \cdot \nabla \left( \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|}\right)   
@@ -217,15 +224,15 @@ end
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
 %  from the sum.
 %  
-%  The sum is evaluated directly - (slow code for testing)
-% 
+%  The sum is evaluated directly (slow code for testing).
+%  Note: currently no self-interactions, just sum at targets.
+%
 %  Args:
 %
 %  -  zk: complex
 %        Helmholtz parameter, k
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:     
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -237,7 +244,6 @@ end
 %     *  srcinfo.dipoles: complex(nd,3,n) 
 %           dipole orientation vectors, $v_{j}$ (optional
 %           default - term corresponding to dipoles dropped) 
-%  
 %  -  targ: double(3,nt)
 %        target locations, $t_{i}$ 
 %  -  pgt: integer
@@ -249,8 +255,8 @@ end
 %  
 %  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
 %  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
- 
-
+%
+% See also: HFMM3D
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
@@ -325,16 +331,23 @@ end
 
 % ---------------------------------------------------------------------
 @function [U,varargout] = lfmm3d(eps,srcinfo,pg,varargin)
+% LFMM3D.  FMM in 3D for Laplace (electrostatic) kernels
 %
+%  [U,varargout] = lfmm3d(eps,srcinfo,pg,varargin)
+%
+%  Laplace FMM in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and possibly
+%  interactions with targets, using the fast multipole method
+%  with precision eps.
 %
 %  This subroutine computes the N-body Laplace
 %  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $1/r$
+%  the interaction kernel is given by $1/r$, namely
 % 
 %    u(x) = \sum_{j=1}^{N} c_{j} \frac{1}{\|x-x_{j}\|} - 
 %      v_{j} \cdot \nabla \left( \frac{1}{\|x-x_{j}\|}\right)   
 %
-%  where $c_{j}$ are the charge densities
+%  where $c_{j}$ are the charge densities,
 %  $v_{j}$ are the dipole orientation vectors, and
 %  $x_{j}$ are the source locations.
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
@@ -345,8 +358,7 @@ end
 %  -  eps: double   
 %        precision requested
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:     
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -358,7 +370,6 @@ end
 %     *  srcinfo.dipoles: double(nd,3,n) 
 %           dipole orientation vectors, $v_{j}$ (optional
 %           default - term corresponding to dipoles dropped) 
-%  
 %  -  pg: integer
 %        | source eval flag
 %        | potential at sources evaluated if pg = 1
@@ -383,15 +394,15 @@ end
 %  
 %  Returns:
 %  
-%  -  U.pot: potential at source locations, if requested, $u(x_{j})$
-%  -  U.grad: gradient at source locations, if requested, $\nabla u(x_{j})$
-%  -  U.hess: hessian at source locations, if requested, $\nabla^2 u(x_{j})$
-%  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
+%  -  U.pot:      potential at source locations, if requested, $u(x_{j})$
+%  -  U.grad:     gradient at source locations, if requested, $\nabla u(x_{j})$
+%  -  U.hess:     hessian at source locations, if requested, $\nabla^2 u(x_{j})$
+%  -  U.pottarg:  potential at target locations, if requested, $u(t_{i})$
 %  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
 %  -  U.hesstarg: hessian at target locations, if requested, $\nabla^2 u(t_{i})$
 %
-%  - ier: error code for fmm run
-%  - timeinfo: time taken in each step of the fmm
+%  - ier: error code for FMM run
+%  - timeinfo: time taken in each step of the FMM
 %       timeinfo(1): form multipole step
 %       timeinfo(2): multipole->multipole translation step
 %       timeinfo(3): multipole to local translation, form local + multipole eval step
@@ -416,7 +427,7 @@ end
 %     Call the FMM for sources only with default arguments, returns
 %     the error code for the FMM as well and the time split
 %      
-
+% See also: L3DDIR
 
 
   sources = srcinfo.sources;
@@ -531,28 +542,30 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = l3ddir(srcinfo,targ,pgt)
+% L3DDIR.   Direct (slow) 3D Laplace kernel sum (reference for LFMM3D)
 %
+% U = l3ddir(srcinfo,targ,pgt)
 %
 %  This subroutine computes the N-body Laplace
 %  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $1/r$
+%  the interaction kernel is given by $1/r$, namely
 % 
 %    u(x) = \sum_{j=1}^{N} c_{j} \frac{1}{\|x-x_{j}\|} - 
 %      v_{j} \cdot \nabla \left( \frac{1}{\|x-x_{j}\|}\right)   
 %
-%  where $c_{j}$ are the charge densities
+%  where $c_{j}$ are the charge densities,
 %  $v_{j}$ are the dipole orientation vectors, and
 %  $x_{j}$ are the source locations.
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
 %  from the sum.
 % 
-%  The sum is evaluated directly - (slow code for testing)
+%  The sum is evaluated directly (slow code for testing).
+%  Note: currently no self-interactions, just sum at targets.
 %
 %  Args:
 %
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -564,7 +577,6 @@ end
 %     *  srcinfo.dipoles: double(nd,3,n) 
 %           dipole orientation vectors, $v_{j}$ (optional
 %           default - term corresponding to dipoles dropped) 
-%  
 %  -  targ: double(3,nt)
 %        target locations, $t_{i}$ (optional)
 %  -  pgt: integer
@@ -574,9 +586,11 @@ end
 %        | potential, gradient and hessian at targets evaluated if pgt=3
 %  
 %  Returns:
-%  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
+%  -  U.pottarg:  potential at target locations, if requested, $u(t_{i})$
 %  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
 %  -  U.hesstarg: hessian at target locations, if requested, $\nabla^2 u(t_{i})$
+%
+% See also: LFMM3D
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
@@ -668,13 +682,13 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
-% EMFMM3D.  FMM in 3D for Maxwell (electromagnetic) kernels
+% EMFMM3D.  FMM in 3D for Maxwell (frequency-domain electromagnetic) kernels
 %
 %  U = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
-%  Maxwell FMM in R^3: evaluate all pairwise particle
+%  Frequency-domain Maxwell FMM in R^3: evaluate all pairwise particle
 %  interactions (ignoring self-interactions) and
-%  interactions with targs, using the fast multipole method
+%  interactions with targets, using the fast multipole method
 %  with precision eps.
 %
 %  This subroutine evaluates sums implied by the operator representation
@@ -697,8 +711,7 @@ end
 %  -  zk: complex
 %        Helmholtz parameter, k
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -834,7 +847,7 @@ end
 %  emfmm3d (see that function for all definitions), except that the first
 %  argument (eps) is absent.
 %
-%  See also: emfmm3d
+%  See also: EMFMM3D
 
   if(nargin<4)
     return;
@@ -1031,7 +1044,7 @@ end
 %  -  U.pretarg: pressure at target locations if requested
 %  -  U.gradtarg: gradient of velocity at target locations if requested
 %
-% See also: st3ddir
+% See also: ST3DDIR
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
@@ -1128,17 +1141,17 @@ end
 %  U = st3ddir(srcinfo,targ,ifppregtarg)
 %
 %  Stokes direct evaluation in R^3: evaluate all pairwise particle
-%  interactions (ignoring self-interactions) and
-%  interactions with targs. This is the slow O(N^2) direct code used
+%  interactions with targets. This is the slow O(N^2) direct code used
 %  as a reference for testing the (fast) code stfmm3d.
 %
 %  Kernel definitions, input and outputs arguments are identical to
 %  stfmm3d (see that function for all definitions), apart from:
 %  1) the first argument (eps) is absent.
-%  2) there are currently no outputs at sources (ie, as if ifppreg=0),
-%     meaning that U.pot, U.pre, and U.grad are missing.
+%  2) there are currently no outputs at sources, meaning that U.pot, U.pre,
+%     and U.grad are missing (as if ifppreg=0). In other words,
+%     just targets for now, and targ is thus not an optional argument.
 %
-% See also: stfmm3d
+% See also: STFMM3D
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);

From 4a8a03dd7ea2cff731d3e4a6f3c1f8e68f13a37e Mon Sep 17 00:00:00 2001
From: ahbarnett <alex.h.barnett@gmail.com>
Date: Mon, 7 Aug 2023 16:35:28 -0400
Subject: [PATCH 07/14] add Contents.m for MATLAB/Octave docs

---
 matlab/Contents.m |  35 +++++++++++
 matlab/em3ddir.m  |  55 ++++--------------
 matlab/emfmm3d.m  |  33 +++++++----
 matlab/fmm3d.c    | 144 +++++++++++++++++++++++-----------------------
 matlab/fmm3d.mw   |  16 +++---
 matlab/h3ddir.m   |  19 +++---
 matlab/hfmm3d.m   |  27 +++++----
 matlab/l3ddir.m   |  17 +++---
 matlab/lfmm3d.m   |  29 ++++++----
 matlab/st3ddir.m  |  12 ++--
 matlab/stfmm3d.m  |   4 +-
 11 files changed, 210 insertions(+), 181 deletions(-)
 create mode 100644 matlab/Contents.m

diff --git a/matlab/Contents.m b/matlab/Contents.m
new file mode 100644
index 00000000..93568a79
--- /dev/null
+++ b/matlab/Contents.m
@@ -0,0 +1,35 @@
+% FMM3D: MATLAB/Octave wrappers to 3D fast multipole methods.
+%        Center for Computational Mathematics, Flatiron Institute.
+%
+% Functions:
+%   lfmm3d   - FMM in 3D for Laplace (electrostatic) kernels.
+%   l3ddir   - Direct (slow) 3D Laplace kernel sums (reference for LFMM3D).
+%   hfmm3d   - FMM in 3D for Helmholtz (acoustic frequency-domain) kernel.
+%   h3ddir   - Direct (slow) 3D Helmholtz kernel sums (reference for HFMM3D).
+%   emfmm3d  - FMM in 3D for Maxwell (frequency-domain electromagnetic) kernels.
+%   em3ddir  - Direct (slow) 3D Maxwell kernel sums (reference for EMFMM3D).
+%   stfmm3d  - FMM in 3D for Stokes (viscous fluid hydrodynamic) kernels.
+%   st3ddir  - Direct (slow) 3D Stokes kernel sums (reference for STFMM3D).
+%
+% For tester driver scripts see:
+%   test_lfmm3d
+%   test_hfmm3d
+%   test_emfmm3d
+%   test_stfmm3d
+%
+% For examples of use see:
+%   lfmm3d_example
+%   lfmm3d_big_example
+%   hfmm3d_example
+%   hfmm3d_big_example
+%   emfmm3d_example
+%   stfmm3d_example
+%   stfmm3d_simpletest
+%
+% Legacy codes/interfaces (from the Gimbutas-Greengard CMCL 2012 library):
+%   lfmm3dpart       - Laplace particle targ FMM in R^3.
+%   l3dpartdirect    - Laplace interactions in R^3, direct (slow) evaluation.
+%   hfmm3dpart       - Helmholtz particle targ FMM in R^3.
+%   h3dpartdirect    - Helmholtz interactions in R^3, direct (slow) evaluation.
+%   test_lfmm3dpart_direct - Test Laplace particle FMMs in R^3
+%   test_hfmm3dpart_direct - Test Helmholtz particle FMMs in R^3
diff --git a/matlab/em3ddir.m b/matlab/em3ddir.m
index 988b5997..d57a163c 100644
--- a/matlab/em3ddir.m
+++ b/matlab/em3ddir.m
@@ -1,51 +1,18 @@
 function [U] = em3ddir(zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
+% EM3DDIR    Slow direct Maxwell kernel sums (reference for EMFMM3D).
 %
+% U = em3ddir(zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
-%  This subroutine computes
-%      E = curl S_{k}[h_current] + S_{k}[e_current] + grad S_{k}[e_charge]  -- (1)
-%  using the vector Helmholtz fmm.
-%  The subroutine also computes divE, curlE
-%  with appropriate flags
-%  Remark: the subroutine uses a stabilized representation
-%  for computing the divergence by using integration by parts
-%  wherever possible. If the divergence is not requested, then the
-%  helmholtz fmm is called with 3*nd densities, while if the divergence
-%  is requested, then the helmholtz fmm is calld with 4*nd densities
-% 
-%  Args:
+%  Maxwell direct evaluation in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and
+%  interactions with targs. This is the slow O(N^2) direct code used
+%  as a reference for testing the fast code emfmm3d.
 %
-%  -  zk: complex
-%        Helmholtz parameter, k
-%  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
-%     *  srcinfo.sources: double(3,n)    
-%           source locations, $x_{j}$
-%     *  srcinfo.nd: integer
-%           number of charge/dipole vectors (optional, 
-%           default - nd = 1)
-%     *  srcinfo.h_current: complex(nd,3,n) 
-%           a vector source (optional,
-%           default - term corresponding to h_current dropped) 
-%     *  srcinfo.e_current: complex(nd,3,n) 
-%           b vector source (optional,
-%           default - term corresponding to e_current dropped) 
-%     *  srcinfo.e_charge: complex(nd,n) 
-%           e_charge source (optional, 
-%           default - term corresponding to e_charge dropped)
-%  -  targ: double(3,nt)
-%        target locations, $t_{i}$
-%  -  ifE: integer
-%        E is returned at the target locations if ifE = 1
-%  -  ifcurlE: integer
-%        curl E is returned at the target locations if ifcurlE = 1
-%  -  ifdivE: integer
-%        div E is returned at the target locations if ifdivE = 1
-%  Returns:
-%  
-%  -  U.E: E field defined in (1) above at target locations if requested
-%  -  U.curlE: curl of E field at target locations if requested
-%  -  U.divE: divergence of E at target locations if requested
+%  Kernel definitions, input and outputs arguments are identical to
+%  emfmm3d (see that function for all definitions), except that the first
+%  argument (eps) is absent.
+%
+%  See also: EMFMM3D
 
   if(nargin<4)
     return;
diff --git a/matlab/emfmm3d.m b/matlab/emfmm3d.m
index 3168be65..1d96c172 100644
--- a/matlab/emfmm3d.m
+++ b/matlab/emfmm3d.m
@@ -1,16 +1,25 @@
 function [U] = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
+% EMFMM3D   FMM in 3D for Maxwell (frequency-domain electromagnetic) kernels.
 %
+%  U = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
+%
+%  Frequency-domain Maxwell FMM in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and
+%  interactions with targets, using the fast multipole method
+%  with precision eps.
+%
+%  This subroutine evaluates sums implied by the operator representation
+%
+%      E = curl S_{k}[h_current] + S_{k}[e_current] + grad S_{k}[e_charge]
+%
+%  by calling the vector Helmholtz FMM.
+%  With appropriate input flags, the subroutine also computes divE, curlE.
 %
-%  This subroutine computes
-%      E = curl S_{k}[h_current] + S_{k}[e_current] + grad S_{k}[e_charge]  -- (1)
-%  using the vector Helmholtz fmm.
-%  The subroutine also computes divE, curlE
-%  with appropriate flags
 %  Remark: the subroutine uses a stabilized representation
 %  for computing the divergence by using integration by parts
 %  wherever possible. If the divergence is not requested, then the
-%  helmholtz fmm is called with 3*nd densities, while if the divergence
-%  is requested, then the helmholtz fmm is calld with 4*nd densities
+%  Helmholtz FMM is called with 3*nd densities, while if the divergence
+%  is requested, then the Helmholtz FMM is called with 4*nd densities
 % 
 %  Args:
 %
@@ -19,8 +28,7 @@
 %  -  zk: complex
 %        Helmholtz parameter, k
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -43,11 +51,14 @@
 %        curl E is returned at the target locations if ifcurlE = 1
 %  -  ifdivE: integer
 %        div E is returned at the target locations if ifdivE = 1
+%
 %  Returns:
 %  
-%  -  U.E: E field defined in (1) above at target locations if requested
+%  -  U.E:     E field defined in above equation at targets if requested
 %  -  U.curlE: curl of E field at target locations if requested
-%  -  U.divE: divergence of E at target locations if requested
+%  -  U.divE:  divergence of E at target locations if requested
+%
+% See also: HFMM3D, EM3DDIR
 
   if(nargin<5)
     return;
diff --git a/matlab/fmm3d.c b/matlab/fmm3d.c
index 7eeacbac..4847b085 100644
--- a/matlab/fmm3d.c
+++ b/matlab/fmm3d.c
@@ -1201,7 +1201,7 @@ MWF77_RETURN MWF77_st3ddirectstokstrsg(int*, double*, double*, int*, double*, do
 } /* end extern C */
 #endif
 
-/* ---- fmm3d.mw: 171 ----
+/* ---- fmm3d.mw: 176 ----
  * hndiv(double[1] eps, int[1] ns, int[1] nt, int[1] ifcharge, int[1] ifdipole, int[1] pg, int[1] pgt, inout int[1] ndiv, inout int[1] idivflag);
  */
 static const char* stubids1_ = "hndiv(i double[x], i int[x], i int[x], i int[x], i int[x], i int[x], i int[x], io int[x], io int[x])";
@@ -1355,7 +1355,7 @@ void mexStub1(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 186 ----
+/* ---- fmm3d.mw: 191 ----
  * hfmm3d_ndiv(int[1] nd, double[1] eps, dcomplex[1] zk, int[1] ns, double[3, ns] sources, int[1] ifcharge, dcomplex[nd, ns] charges, int[1] ifdipole, dcomplex[nd3, ns] dipoles, int[1] iper, int[1] pg, inout dcomplex[nd, ns] pot, inout dcomplex[nd3, ns] grad, inout dcomplex[nd6, ns] hess, int[1] nt, double[3, ntuse] targ, int[1] pgt, inout dcomplex[nd, ntuse] pottarg, inout dcomplex[nd3, ntuse] gradtarg, inout dcomplex[nd6, ntuse] hesstarg, int[1] ndiv, int[1] idivflag, int[1] ifnear, inout double[6] timeinfo, inout int[1] ier);
  */
 static const char* stubids2_ = "hfmm3d_ndiv(i int[x], i double[x], i dcomplex[x], i int[x], i double[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i int[x], io dcomplex[xx], io dcomplex[xx], io dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], io dcomplex[xx], io dcomplex[xx], i int[x], i int[x], i int[x], io double[x], io int[x])";
@@ -1820,7 +1820,7 @@ void mexStub2(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 301 ----
+/* ---- fmm3d.mw: 307 ----
  * h3ddirectcp(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids3_ = "h3ddirectcp(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i double[x])";
@@ -2005,7 +2005,7 @@ void mexStub3(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 304 ----
+/* ---- fmm3d.mw: 310 ----
  * h3ddirectdp(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids4_ = "h3ddirectdp(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i double[x])";
@@ -2190,7 +2190,7 @@ void mexStub4(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 307 ----
+/* ---- fmm3d.mw: 313 ----
  * h3ddirectcdp(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd, ns] charges, dcomplex[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids5_ = "h3ddirectcdp(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i double[x])";
@@ -2396,7 +2396,7 @@ void mexStub5(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 313 ----
+/* ---- fmm3d.mw: 319 ----
  * h3ddirectcg(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, inout dcomplex[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids6_ = "h3ddirectcg(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], io dcomplex[xx], i double[x])";
@@ -2604,7 +2604,7 @@ void mexStub6(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 316 ----
+/* ---- fmm3d.mw: 322 ----
  * h3ddirectdg(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, inout dcomplex[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids7_ = "h3ddirectdg(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], io dcomplex[xx], i double[x])";
@@ -2812,7 +2812,7 @@ void mexStub7(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 319 ----
+/* ---- fmm3d.mw: 325 ----
  * h3ddirectcdg(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd, ns] charges, dcomplex[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, inout dcomplex[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids8_ = "h3ddirectcdg(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], io dcomplex[xx], i double[x])";
@@ -3041,7 +3041,7 @@ void mexStub8(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 498 ----
+/* ---- fmm3d.mw: 509 ----
  * lndiv(double[1] eps, int[1] ns, int[1] nt, int[1] ifcharge, int[1] ifdipole, int[1] pg, int[1] pgt, inout int[1] ndiv, inout int[1] idivflag);
  */
 static const char* stubids9_ = "lndiv(i double[x], i int[x], i int[x], i int[x], i int[x], i int[x], i int[x], io int[x], io int[x])";
@@ -3195,7 +3195,7 @@ void mexStub9(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 513 ----
+/* ---- fmm3d.mw: 524 ----
  * lfmm3d_ndiv(int[1] nd, double[1] eps, int[1] ns, double[3, ns] sources, int[1] ifcharge, double[nd, ns] charges, int[1] ifdipole, double[nd3, ns] dipoles, int[1] iper, int[1] pg, inout double[nd, ns] pot, inout double[nd3, ns] grad, inout double[nd6, ns] hess, int[1] nt, double[3, ntuse] targ, int[1] pgt, inout double[nd, ntuse] pottarg, inout double[nd3, ntuse] gradtarg, inout double[nd6, ntuse] hesstarg, int[1] ndiv, int[1] idivflag, int[1] ifnear, inout double[6] timeinfo, inout int[1] ier);
  */
 static const char* stubids10_ = "lfmm3d_ndiv(i int[x], i double[x], i int[x], i double[xx], i int[x], i double[xx], i int[x], i double[xx], i int[x], i int[x], io double[xx], io double[xx], io double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i int[x], i int[x], i int[x], io double[x], io int[x])";
@@ -3627,7 +3627,7 @@ void mexStub10(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 630 ----
+/* ---- fmm3d.mw: 644 ----
  * l3ddirectcp(int[1] nd, double[3, ns] sources, double[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids11_ = "l3ddirectcp(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], i double[x])";
@@ -3793,7 +3793,7 @@ void mexStub11(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 633 ----
+/* ---- fmm3d.mw: 647 ----
  * l3ddirectdp(int[1] nd, double[3, ns] sources, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids12_ = "l3ddirectdp(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], i double[x])";
@@ -3959,7 +3959,7 @@ void mexStub12(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 636 ----
+/* ---- fmm3d.mw: 650 ----
  * l3ddirectcdp(int[1] nd, double[3, ns] sources, double[nd, ns] charges, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids13_ = "l3ddirectcdp(i int[x], i double[xx], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], i double[x])";
@@ -4147,7 +4147,7 @@ void mexStub13(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 642 ----
+/* ---- fmm3d.mw: 656 ----
  * l3ddirectcg(int[1] nd, double[3, ns] sources, double[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids14_ = "l3ddirectcg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], i double[x])";
@@ -4333,7 +4333,7 @@ void mexStub14(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 645 ----
+/* ---- fmm3d.mw: 659 ----
  * l3ddirectdg(int[1] nd, double[3, ns] sources, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids15_ = "l3ddirectdg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], i double[x])";
@@ -4519,7 +4519,7 @@ void mexStub15(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 648 ----
+/* ---- fmm3d.mw: 662 ----
  * l3ddirectcdg(int[1] nd, double[3, ns] sources, double[nd, ns] charges, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids16_ = "l3ddirectcdg(i int[x], i double[xx], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], i double[x])";
@@ -4727,7 +4727,7 @@ void mexStub16(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 655 ----
+/* ---- fmm3d.mw: 669 ----
  * l3ddirectch(int[1] nd, double[3, ns] sources, double[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, inout double[nd6, nt] hesstarg, double[1] thresh);
  */
 static const char* stubids17_ = "l3ddirectch(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -4933,7 +4933,7 @@ void mexStub17(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 658 ----
+/* ---- fmm3d.mw: 672 ----
  * l3ddirectdh(int[1] nd, double[3, ns] sources, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, inout double[nd6, nt] hesstarg, double[1] thresh);
  */
 static const char* stubids18_ = "l3ddirectdh(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -5139,7 +5139,7 @@ void mexStub18(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 661 ----
+/* ---- fmm3d.mw: 675 ----
  * l3ddirectcdh(int[1] nd, double[3, ns] sources, double[nd, ns] charges, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, inout double[nd6, nt] hesstarg, double[1] thresh);
  */
 static const char* stubids19_ = "l3ddirectcdh(i int[x], i double[xx], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -5367,7 +5367,7 @@ void mexStub19(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 797 ----
+/* ---- fmm3d.mw: 822 ----
  * emfmm3d(int[1] nd, double[1] eps, dcomplex[1] zk, int[1] ns, double[3, ns] sources, int[1] ifh_current, dcomplex[nd3, ns_h_current] h_current, int[1] ife_current, dcomplex[nd3, ns_e_current] e_current, int[1] ife_charge, dcomplex[nd, ns_e_charge] e_charge, int[1] nt, double[3, nt] targ, int[1] ifE, inout dcomplex[nd3, nt_E] E, int[1] ifcurlE, inout dcomplex[nd3, nt_curlE] curlE, int[1] ifdivE, inout dcomplex[nd, nt_divE] divE, inout int[1] ier);
  */
 static const char* stubids20_ = "emfmm3d(i int[x], i double[x], i dcomplex[x], i int[x], i double[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i int[x], io dcomplex[xx], i int[x], io dcomplex[xx], io int[x])";
@@ -5740,7 +5740,7 @@ void mexStub20(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 938 ----
+/* ---- fmm3d.mw: 930 ----
  * em3ddirect(int[1] nd, dcomplex[1] zk, int[1] ns, double[3, ns] sources, int[1] ifh_current, dcomplex[nd3, ns_h_current] h_current, int[1] ife_current, dcomplex[nd3, ns_e_current] e_current, int[1] ife_charge, dcomplex[nd, ns_e_charge] e_charge, int[1] nt, double[3, nt] targ, int[1] ifE, inout dcomplex[nd3, nt_E] E, int[1] ifcurlE, inout dcomplex[nd3, nt_curlE] curlE, int[1] ifdivE, inout dcomplex[nd, nt_divE] divE, double[1] thresh);
  */
 static const char* stubids21_ = "em3ddirect(i int[x], i dcomplex[x], i int[x], i double[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i int[x], io dcomplex[xx], i int[x], io dcomplex[xx], i double[x])";
@@ -6097,7 +6097,7 @@ void mexStub21(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 1128 ----
+/* ---- fmm3d.mw: 1120 ----
  * stfmm3d(int[1] nd, double[1] eps, int[1] ns, double[3, ns] sources, int[1] ifstoklet, double[nd3, ns_stok] stoklet, int[1] ifstrslet, double[nd3, ns_strs] strslet, double[nd3, ns_strs] strsvec, int[1] ifppreg, inout double[nd3, ns_pot] pot, inout double[nd, ns_pre] pre, inout double[nd9, ns_grad] grad, int[1] nt, double[3, nt] targ, int[1] ifppregtarg, inout double[nd3, nt_pot] pottarg, inout double[nd, nt_pre] pretarg, inout double[nd9, nt_grad] gradtarg, inout int[1] ier);
  */
 static const char* stubids22_ = "stfmm3d(i int[x], i double[x], i int[x], i double[xx], i int[x], i double[xx], i int[x], i double[xx], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], io int[x])";
@@ -6479,7 +6479,7 @@ void mexStub22(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 1223 ----
+/* ---- fmm3d.mw: 1217 ----
  * st3ddirectstokg(int[1] nd, double[3, ns] sources, double[nd3, ns_stok] stoklet, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd3, nt] pottarg, inout double[nd, nt] pretarg, inout double[nd9, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids23_ = "st3ddirectstokg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -6685,7 +6685,7 @@ void mexStub23(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 1226 ----
+/* ---- fmm3d.mw: 1220 ----
  * st3ddirectstokstrsg(int[1] nd, double[3, ns] sources, double[nd3, ns_stok] stoklet, int[1] istress, double[nd3, ns_strs] strslet, double[nd3, ns_strs] strsvec, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd3, nt] pottarg, inout double[nd, nt] pretarg, inout double[nd9, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids24_ = "st3ddirectstokstrsg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -7025,30 +7025,30 @@ void mexFunction(int nlhs, mxArray* plhs[],
     } else if (strcmp(id, "*profile report*") == 0) {
         if (!mexprofrecord_)
             mexPrintf("Profiler inactive\n");
-        mexPrintf("%d calls to fmm3d.mw:171\n", mexprofrecord_[1]);
-        mexPrintf("%d calls to fmm3d.mw:186\n", mexprofrecord_[2]);
-        mexPrintf("%d calls to fmm3d.mw:301\n", mexprofrecord_[3]);
-        mexPrintf("%d calls to fmm3d.mw:304\n", mexprofrecord_[4]);
-        mexPrintf("%d calls to fmm3d.mw:307\n", mexprofrecord_[5]);
-        mexPrintf("%d calls to fmm3d.mw:313\n", mexprofrecord_[6]);
-        mexPrintf("%d calls to fmm3d.mw:316\n", mexprofrecord_[7]);
-        mexPrintf("%d calls to fmm3d.mw:319\n", mexprofrecord_[8]);
-        mexPrintf("%d calls to fmm3d.mw:498\n", mexprofrecord_[9]);
-        mexPrintf("%d calls to fmm3d.mw:513\n", mexprofrecord_[10]);
-        mexPrintf("%d calls to fmm3d.mw:630\n", mexprofrecord_[11]);
-        mexPrintf("%d calls to fmm3d.mw:633\n", mexprofrecord_[12]);
-        mexPrintf("%d calls to fmm3d.mw:636\n", mexprofrecord_[13]);
-        mexPrintf("%d calls to fmm3d.mw:642\n", mexprofrecord_[14]);
-        mexPrintf("%d calls to fmm3d.mw:645\n", mexprofrecord_[15]);
-        mexPrintf("%d calls to fmm3d.mw:648\n", mexprofrecord_[16]);
-        mexPrintf("%d calls to fmm3d.mw:655\n", mexprofrecord_[17]);
-        mexPrintf("%d calls to fmm3d.mw:658\n", mexprofrecord_[18]);
-        mexPrintf("%d calls to fmm3d.mw:661\n", mexprofrecord_[19]);
-        mexPrintf("%d calls to fmm3d.mw:797\n", mexprofrecord_[20]);
-        mexPrintf("%d calls to fmm3d.mw:938\n", mexprofrecord_[21]);
-        mexPrintf("%d calls to fmm3d.mw:1128\n", mexprofrecord_[22]);
-        mexPrintf("%d calls to fmm3d.mw:1223\n", mexprofrecord_[23]);
-        mexPrintf("%d calls to fmm3d.mw:1226\n", mexprofrecord_[24]);
+        mexPrintf("%d calls to fmm3d.mw:176\n", mexprofrecord_[1]);
+        mexPrintf("%d calls to fmm3d.mw:191\n", mexprofrecord_[2]);
+        mexPrintf("%d calls to fmm3d.mw:307\n", mexprofrecord_[3]);
+        mexPrintf("%d calls to fmm3d.mw:310\n", mexprofrecord_[4]);
+        mexPrintf("%d calls to fmm3d.mw:313\n", mexprofrecord_[5]);
+        mexPrintf("%d calls to fmm3d.mw:319\n", mexprofrecord_[6]);
+        mexPrintf("%d calls to fmm3d.mw:322\n", mexprofrecord_[7]);
+        mexPrintf("%d calls to fmm3d.mw:325\n", mexprofrecord_[8]);
+        mexPrintf("%d calls to fmm3d.mw:509\n", mexprofrecord_[9]);
+        mexPrintf("%d calls to fmm3d.mw:524\n", mexprofrecord_[10]);
+        mexPrintf("%d calls to fmm3d.mw:644\n", mexprofrecord_[11]);
+        mexPrintf("%d calls to fmm3d.mw:647\n", mexprofrecord_[12]);
+        mexPrintf("%d calls to fmm3d.mw:650\n", mexprofrecord_[13]);
+        mexPrintf("%d calls to fmm3d.mw:656\n", mexprofrecord_[14]);
+        mexPrintf("%d calls to fmm3d.mw:659\n", mexprofrecord_[15]);
+        mexPrintf("%d calls to fmm3d.mw:662\n", mexprofrecord_[16]);
+        mexPrintf("%d calls to fmm3d.mw:669\n", mexprofrecord_[17]);
+        mexPrintf("%d calls to fmm3d.mw:672\n", mexprofrecord_[18]);
+        mexPrintf("%d calls to fmm3d.mw:675\n", mexprofrecord_[19]);
+        mexPrintf("%d calls to fmm3d.mw:822\n", mexprofrecord_[20]);
+        mexPrintf("%d calls to fmm3d.mw:930\n", mexprofrecord_[21]);
+        mexPrintf("%d calls to fmm3d.mw:1120\n", mexprofrecord_[22]);
+        mexPrintf("%d calls to fmm3d.mw:1217\n", mexprofrecord_[23]);
+        mexPrintf("%d calls to fmm3d.mw:1220\n", mexprofrecord_[24]);
     } else if (strcmp(id, "*profile log*") == 0) {
         FILE* logfp;
         if (nrhs != 2 || mxGetString(prhs[1], id, sizeof(id)) != 0)
@@ -7058,30 +7058,30 @@ void mexFunction(int nlhs, mxArray* plhs[],
             mexErrMsgTxt("Cannot open log for output");
         if (!mexprofrecord_)
             fprintf(logfp, "Profiler inactive\n");
-        fprintf(logfp, "%d calls to fmm3d.mw:171\n", mexprofrecord_[1]);
-        fprintf(logfp, "%d calls to fmm3d.mw:186\n", mexprofrecord_[2]);
-        fprintf(logfp, "%d calls to fmm3d.mw:301\n", mexprofrecord_[3]);
-        fprintf(logfp, "%d calls to fmm3d.mw:304\n", mexprofrecord_[4]);
-        fprintf(logfp, "%d calls to fmm3d.mw:307\n", mexprofrecord_[5]);
-        fprintf(logfp, "%d calls to fmm3d.mw:313\n", mexprofrecord_[6]);
-        fprintf(logfp, "%d calls to fmm3d.mw:316\n", mexprofrecord_[7]);
-        fprintf(logfp, "%d calls to fmm3d.mw:319\n", mexprofrecord_[8]);
-        fprintf(logfp, "%d calls to fmm3d.mw:498\n", mexprofrecord_[9]);
-        fprintf(logfp, "%d calls to fmm3d.mw:513\n", mexprofrecord_[10]);
-        fprintf(logfp, "%d calls to fmm3d.mw:630\n", mexprofrecord_[11]);
-        fprintf(logfp, "%d calls to fmm3d.mw:633\n", mexprofrecord_[12]);
-        fprintf(logfp, "%d calls to fmm3d.mw:636\n", mexprofrecord_[13]);
-        fprintf(logfp, "%d calls to fmm3d.mw:642\n", mexprofrecord_[14]);
-        fprintf(logfp, "%d calls to fmm3d.mw:645\n", mexprofrecord_[15]);
-        fprintf(logfp, "%d calls to fmm3d.mw:648\n", mexprofrecord_[16]);
-        fprintf(logfp, "%d calls to fmm3d.mw:655\n", mexprofrecord_[17]);
-        fprintf(logfp, "%d calls to fmm3d.mw:658\n", mexprofrecord_[18]);
-        fprintf(logfp, "%d calls to fmm3d.mw:661\n", mexprofrecord_[19]);
-        fprintf(logfp, "%d calls to fmm3d.mw:797\n", mexprofrecord_[20]);
-        fprintf(logfp, "%d calls to fmm3d.mw:938\n", mexprofrecord_[21]);
-        fprintf(logfp, "%d calls to fmm3d.mw:1128\n", mexprofrecord_[22]);
-        fprintf(logfp, "%d calls to fmm3d.mw:1223\n", mexprofrecord_[23]);
-        fprintf(logfp, "%d calls to fmm3d.mw:1226\n", mexprofrecord_[24]);
+        fprintf(logfp, "%d calls to fmm3d.mw:176\n", mexprofrecord_[1]);
+        fprintf(logfp, "%d calls to fmm3d.mw:191\n", mexprofrecord_[2]);
+        fprintf(logfp, "%d calls to fmm3d.mw:307\n", mexprofrecord_[3]);
+        fprintf(logfp, "%d calls to fmm3d.mw:310\n", mexprofrecord_[4]);
+        fprintf(logfp, "%d calls to fmm3d.mw:313\n", mexprofrecord_[5]);
+        fprintf(logfp, "%d calls to fmm3d.mw:319\n", mexprofrecord_[6]);
+        fprintf(logfp, "%d calls to fmm3d.mw:322\n", mexprofrecord_[7]);
+        fprintf(logfp, "%d calls to fmm3d.mw:325\n", mexprofrecord_[8]);
+        fprintf(logfp, "%d calls to fmm3d.mw:509\n", mexprofrecord_[9]);
+        fprintf(logfp, "%d calls to fmm3d.mw:524\n", mexprofrecord_[10]);
+        fprintf(logfp, "%d calls to fmm3d.mw:644\n", mexprofrecord_[11]);
+        fprintf(logfp, "%d calls to fmm3d.mw:647\n", mexprofrecord_[12]);
+        fprintf(logfp, "%d calls to fmm3d.mw:650\n", mexprofrecord_[13]);
+        fprintf(logfp, "%d calls to fmm3d.mw:656\n", mexprofrecord_[14]);
+        fprintf(logfp, "%d calls to fmm3d.mw:659\n", mexprofrecord_[15]);
+        fprintf(logfp, "%d calls to fmm3d.mw:662\n", mexprofrecord_[16]);
+        fprintf(logfp, "%d calls to fmm3d.mw:669\n", mexprofrecord_[17]);
+        fprintf(logfp, "%d calls to fmm3d.mw:672\n", mexprofrecord_[18]);
+        fprintf(logfp, "%d calls to fmm3d.mw:675\n", mexprofrecord_[19]);
+        fprintf(logfp, "%d calls to fmm3d.mw:822\n", mexprofrecord_[20]);
+        fprintf(logfp, "%d calls to fmm3d.mw:930\n", mexprofrecord_[21]);
+        fprintf(logfp, "%d calls to fmm3d.mw:1120\n", mexprofrecord_[22]);
+        fprintf(logfp, "%d calls to fmm3d.mw:1217\n", mexprofrecord_[23]);
+        fprintf(logfp, "%d calls to fmm3d.mw:1220\n", mexprofrecord_[24]);
         fclose(logfp);
     } else
         mexErrMsgTxt("Unknown identifier");
diff --git a/matlab/fmm3d.mw b/matlab/fmm3d.mw
index d6a21ab9..ee61da5a 100644
--- a/matlab/fmm3d.mw
+++ b/matlab/fmm3d.mw
@@ -1,6 +1,6 @@
 % ---------------------------------------------------------------------
 @function [U,varargout] = hfmm3d(eps,zk,srcinfo,pg,varargin)
-% HFMM3D.  FMM in 3D for Helmholtz (acoustic frequency-domain) kernel
+% HFMM3D    FMM in 3D for Helmholtz (acoustic frequency-domain) kernel.
 %
 % [U,varargout] = hfmm3d(eps,zk,srcinfo,pg,varargin)
 %
@@ -207,7 +207,7 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = h3ddir(zk,srcinfo,targ,pgt)
-% H3DDIR.   Direct (slow) 3D Helmholtz kernel sums (reference for HFMM3D)
+% H3DDIR   Direct (slow) 3D Helmholtz kernel sums (reference for HFMM3D).
 %
 %  U = h3ddir(zk,srcinfo,targ,pgt)
 %
@@ -331,7 +331,7 @@ end
 
 % ---------------------------------------------------------------------
 @function [U,varargout] = lfmm3d(eps,srcinfo,pg,varargin)
-% LFMM3D.  FMM in 3D for Laplace (electrostatic) kernels
+% LFMM3D    FMM in 3D for Laplace (electrostatic) kernels.
 %
 %  [U,varargout] = lfmm3d(eps,srcinfo,pg,varargin)
 %
@@ -542,7 +542,7 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = l3ddir(srcinfo,targ,pgt)
-% L3DDIR.   Direct (slow) 3D Laplace kernel sum (reference for LFMM3D)
+% L3DDIR    Direct (slow) 3D Laplace kernel sums (reference for LFMM3D).
 %
 % U = l3ddir(srcinfo,targ,pgt)
 %
@@ -682,7 +682,7 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
-% EMFMM3D.  FMM in 3D for Maxwell (frequency-domain electromagnetic) kernels
+% EMFMM3D   FMM in 3D for Maxwell (frequency-domain electromagnetic) kernels.
 %
 %  U = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
@@ -834,7 +834,7 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = em3ddir(zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
-% EM3DDIR.  Slow direct Maxwell kernel evaluations (reference for EMFMM3D)
+% EM3DDIR    Slow direct Maxwell kernel sums (reference for EMFMM3D).
 %
 % U = em3ddir(zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
@@ -943,7 +943,7 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
-% STFMM3D.  FMM in 3D for Stokes (viscous fluid hydrodynamic) kernels
+% STFMM3D    FMM in 3D for Stokes (viscous fluid hydrodynamic) kernels.
 %
 % U = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
 %
@@ -1136,7 +1136,7 @@ end
 
 % ---------------------------------------------------------------------
 @function [U] = st3ddir(srcinfo,targ,ifppregtarg)
-% ST3DDIR.  Stokes 3D direct slow kernel evaluation (reference for STFMM3D)
+% ST3DDIR    Direct (slow) 3D Stokes kernel sums (reference for STFMM3D).
 %
 %  U = st3ddir(srcinfo,targ,ifppregtarg)
 %
diff --git a/matlab/h3ddir.m b/matlab/h3ddir.m
index 14837487..cdcce8fa 100644
--- a/matlab/h3ddir.m
+++ b/matlab/h3ddir.m
@@ -1,9 +1,11 @@
 function [U] = h3ddir(zk,srcinfo,targ,pgt)
+% H3DDIR   Direct (slow) 3D Helmholtz kernel sums (reference for HFMM3D).
 %
+%  U = h3ddir(zk,srcinfo,targ,pgt)
 %
 %  This subroutine computes the N-body Helmholtz
-%  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $e^{ikr}/r$
+%  interactions and its gradients in three dimensions, where 
+%  the interaction kernel is given by $e^{ikr}/r$,
 % 
 %    u(x) = \sum_{j=1}^{N} c_{j} \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|} - 
 %      v_{j} \cdot \nabla \left( \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|}\right)   
@@ -14,15 +16,15 @@
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
 %  from the sum.
 %  
-%  The sum is evaluated directly - (slow code for testing)
-% 
+%  The sum is evaluated directly (slow code for testing).
+%  Note: currently no self-interactions, just sum at targets.
+%
 %  Args:
 %
 %  -  zk: complex
 %        Helmholtz parameter, k
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:     
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -34,7 +36,6 @@
 %     *  srcinfo.dipoles: complex(nd,3,n) 
 %           dipole orientation vectors, $v_{j}$ (optional
 %           default - term corresponding to dipoles dropped) 
-%  
 %  -  targ: double(3,nt)
 %        target locations, $t_{i}$ 
 %  -  pgt: integer
@@ -46,8 +47,8 @@
 %  
 %  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
 %  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
- 
-
+%
+% See also: HFMM3D
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
diff --git a/matlab/hfmm3d.m b/matlab/hfmm3d.m
index d10dd050..ae57947d 100644
--- a/matlab/hfmm3d.m
+++ b/matlab/hfmm3d.m
@@ -1,9 +1,16 @@
 function [U,varargout] = hfmm3d(eps,zk,srcinfo,pg,varargin)
+% HFMM3D    FMM in 3D for Helmholtz (acoustic frequency-domain) kernel.
 %
+% [U,varargout] = hfmm3d(eps,zk,srcinfo,pg,varargin)
+%
+%  Helmholtz FMM in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and possibly
+%  interactions with targets, using the fast multipole method
+%  with precision eps.
 %
 %  This subroutine computes the N-body Helmholtz
-%  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $e^{ikr}/r$
+%  interactions and its gradients in three dimensions, where 
+%  the interaction kernel is given by $e^{ikr}/r$,
 % 
 %    u(x) = \sum_{j=1}^{N} c_{j} \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|} - 
 %      v_{j} \cdot \nabla \left( \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|}\right)   
@@ -21,8 +28,7 @@
 %  -  zk: complex
 %        Helmholtz parameter, k
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -57,13 +63,13 @@
 %  
 %  Returns:
 %  
-%  -  U.pot: potential at source locations, if requested, $u(x_{j})$
-%  -  U.grad: gradient at source locations, if requested, $\nabla u(x_{j})$
-%  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
+%  -  U.pot:      potential at source locations, if requested, $u(x_{j})$
+%  -  U.grad:     gradient at source locations, if requested, $\nabla u(x_{j})$
+%  -  U.pottarg:  potential at target locations, if requested, $u(t_{i})$
 %  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
 %
-%  - ier: error code for fmm run
-%  - timeinfo: time taken in each step of the fmm
+%  - ier: error code for FMM run
+%  - timeinfo: time taken in each step of the FMM
 %       timeinfo(1): form multipole step
 %       timeinfo(2): multipole->multipole translation step
 %       timeinfo(3): multipole to local translation, form local + multipole eval step
@@ -88,8 +94,7 @@
 %     Call the FMM for sources only with default arguments, returns
 %     the error code for the FMM as well and the time split
 %      
- 
-
+% See also: H3DDIR
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
diff --git a/matlab/l3ddir.m b/matlab/l3ddir.m
index 70afaaf9..314029ab 100644
--- a/matlab/l3ddir.m
+++ b/matlab/l3ddir.m
@@ -1,26 +1,28 @@
 function [U] = l3ddir(srcinfo,targ,pgt)
+% L3DDIR    Direct (slow) 3D Laplace kernel sums (reference for LFMM3D).
 %
+% U = l3ddir(srcinfo,targ,pgt)
 %
 %  This subroutine computes the N-body Laplace
 %  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $1/r$
+%  the interaction kernel is given by $1/r$, namely
 % 
 %    u(x) = \sum_{j=1}^{N} c_{j} \frac{1}{\|x-x_{j}\|} - 
 %      v_{j} \cdot \nabla \left( \frac{1}{\|x-x_{j}\|}\right)   
 %
-%  where $c_{j}$ are the charge densities
+%  where $c_{j}$ are the charge densities,
 %  $v_{j}$ are the dipole orientation vectors, and
 %  $x_{j}$ are the source locations.
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
 %  from the sum.
 % 
-%  The sum is evaluated directly - (slow code for testing)
+%  The sum is evaluated directly (slow code for testing).
+%  Note: currently no self-interactions, just sum at targets.
 %
 %  Args:
 %
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -32,7 +34,6 @@
 %     *  srcinfo.dipoles: double(nd,3,n) 
 %           dipole orientation vectors, $v_{j}$ (optional
 %           default - term corresponding to dipoles dropped) 
-%  
 %  -  targ: double(3,nt)
 %        target locations, $t_{i}$ (optional)
 %  -  pgt: integer
@@ -42,9 +43,11 @@
 %        | potential, gradient and hessian at targets evaluated if pgt=3
 %  
 %  Returns:
-%  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
+%  -  U.pottarg:  potential at target locations, if requested, $u(t_{i})$
 %  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
 %  -  U.hesstarg: hessian at target locations, if requested, $\nabla^2 u(t_{i})$
+%
+% See also: LFMM3D
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
diff --git a/matlab/lfmm3d.m b/matlab/lfmm3d.m
index 6c9b2ed1..e24115e6 100644
--- a/matlab/lfmm3d.m
+++ b/matlab/lfmm3d.m
@@ -1,14 +1,21 @@
 function [U,varargout] = lfmm3d(eps,srcinfo,pg,varargin)
+% LFMM3D    FMM in 3D for Laplace (electrostatic) kernels.
 %
+%  [U,varargout] = lfmm3d(eps,srcinfo,pg,varargin)
+%
+%  Laplace FMM in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and possibly
+%  interactions with targets, using the fast multipole method
+%  with precision eps.
 %
 %  This subroutine computes the N-body Laplace
 %  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $1/r$
+%  the interaction kernel is given by $1/r$, namely
 % 
 %    u(x) = \sum_{j=1}^{N} c_{j} \frac{1}{\|x-x_{j}\|} - 
 %      v_{j} \cdot \nabla \left( \frac{1}{\|x-x_{j}\|}\right)   
 %
-%  where $c_{j}$ are the charge densities
+%  where $c_{j}$ are the charge densities,
 %  $v_{j}$ are the dipole orientation vectors, and
 %  $x_{j}$ are the source locations.
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
@@ -19,8 +26,7 @@
 %  -  eps: double   
 %        precision requested
 %  -  srcinfo: structure
-%        structure containing sourceinfo
-%     
+%        structure containing the following info about the sources:     
 %     *  srcinfo.sources: double(3,n)    
 %           source locations, $x_{j}$
 %     *  srcinfo.nd: integer
@@ -32,7 +38,6 @@
 %     *  srcinfo.dipoles: double(nd,3,n) 
 %           dipole orientation vectors, $v_{j}$ (optional
 %           default - term corresponding to dipoles dropped) 
-%  
 %  -  pg: integer
 %        | source eval flag
 %        | potential at sources evaluated if pg = 1
@@ -57,15 +62,15 @@
 %  
 %  Returns:
 %  
-%  -  U.pot: potential at source locations, if requested, $u(x_{j})$
-%  -  U.grad: gradient at source locations, if requested, $\nabla u(x_{j})$
-%  -  U.hess: hessian at source locations, if requested, $\nabla^2 u(x_{j})$
-%  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
+%  -  U.pot:      potential at source locations, if requested, $u(x_{j})$
+%  -  U.grad:     gradient at source locations, if requested, $\nabla u(x_{j})$
+%  -  U.hess:     hessian at source locations, if requested, $\nabla^2 u(x_{j})$
+%  -  U.pottarg:  potential at target locations, if requested, $u(t_{i})$
 %  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
 %  -  U.hesstarg: hessian at target locations, if requested, $\nabla^2 u(t_{i})$
 %
-%  - ier: error code for fmm run
-%  - timeinfo: time taken in each step of the fmm
+%  - ier: error code for FMM run
+%  - timeinfo: time taken in each step of the FMM
 %       timeinfo(1): form multipole step
 %       timeinfo(2): multipole->multipole translation step
 %       timeinfo(3): multipole to local translation, form local + multipole eval step
@@ -90,7 +95,7 @@
 %     Call the FMM for sources only with default arguments, returns
 %     the error code for the FMM as well and the time split
 %      
-
+% See also: L3DDIR
 
 
   sources = srcinfo.sources;
diff --git a/matlab/st3ddir.m b/matlab/st3ddir.m
index b1edaef5..1589cdb5 100644
--- a/matlab/st3ddir.m
+++ b/matlab/st3ddir.m
@@ -1,18 +1,20 @@
 function [U] = st3ddir(srcinfo,targ,ifppregtarg)
-% ST3DDIR - Stokes 3D direct slow kernel evaluation (tester for STFMM3D) 
+% ST3DDIR    Direct (slow) 3D Stokes kernel sums (reference for STFMM3D).
 %
 %  U = st3ddir(srcinfo,targ,ifppregtarg)
 %
 %  Stokes direct evaluation in R^3: evaluate all pairwise particle
-%  interactions (ignoring self-interactions) and
-%  interactions with targs. This is the slow O(N^2) direct code used
+%  interactions with targets. This is the slow O(N^2) direct code used
 %  as a reference for testing the (fast) code stfmm3d.
 %
 %  Kernel definitions, input and outputs arguments are identical to
 %  stfmm3d (see that function for all definitions), apart from:
-%  1) there are currently no outputs at sources (ie, as if ifppreg=0)
+%  1) the first argument (eps) is absent.
+%  2) there are currently no outputs at sources, meaning that U.pot, U.pre,
+%     and U.grad are missing (as if ifppreg=0). In other words,
+%     just targets for now, and targ is thus not an optional argument.
 %
-% See also: stfmm3d
+% See also: STFMM3D
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);
diff --git a/matlab/stfmm3d.m b/matlab/stfmm3d.m
index e11acdd2..c59afc6f 100644
--- a/matlab/stfmm3d.m
+++ b/matlab/stfmm3d.m
@@ -1,5 +1,5 @@
 function [U] = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
-% STFMM3D - Stokes 3D FMM
+% STFMM3D    FMM in 3D for Stokes (viscous fluid hydrodynamic) kernels.
 %
 % U = stfmm3d(eps,srcinfo,ifppreg,targ,ifppregtarg)
 %
@@ -100,7 +100,7 @@
 %  -  U.pretarg: pressure at target locations if requested
 %  -  U.gradtarg: gradient of velocity at target locations if requested
 %
-% See also: st3ddir
+% See also: ST3DDIR
 
   sources = srcinfo.sources;
   [m,ns] = size(sources);

From 1aebdcbef64a8f1de4e35f32d39c6c6d52ef698e Mon Sep 17 00:00:00 2001
From: Travis Askham <askhamwhat@gmail.com>
Date: Tue, 8 Aug 2023 10:45:38 -0400
Subject: [PATCH 08/14] make h3ddir and l3ddir consistent with st3ddir, cut
 down repeated doc

---
 matlab/h3ddir.m | 52 +++++++++----------------------------------------
 matlab/l3ddir.m | 51 +++++++++---------------------------------------
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/matlab/h3ddir.m b/matlab/h3ddir.m
index cdcce8fa..d9b7f537 100644
--- a/matlab/h3ddir.m
+++ b/matlab/h3ddir.m
@@ -3,50 +3,16 @@
 %
 %  U = h3ddir(zk,srcinfo,targ,pgt)
 %
-%  This subroutine computes the N-body Helmholtz
-%  interactions and its gradients in three dimensions, where 
-%  the interaction kernel is given by $e^{ikr}/r$,
-% 
-%    u(x) = \sum_{j=1}^{N} c_{j} \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|} - 
-%      v_{j} \cdot \nabla \left( \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|}\right)   
+%  Helmholtz direct evaluation in R^3: evaluate all pairwise particle
+%  interactions with targets. This is the slow O(N^2) direct code used
+%  as a reference for testing the (fast) code hfmm3d.
 %
-%  where $c_{j}$ are the charge densities
-%  $v_{j}$ are the dipole orientation vectors, and
-%  $x_{j}$ are the source locations.
-%  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
-%  from the sum.
-%  
-%  The sum is evaluated directly (slow code for testing).
-%  Note: currently no self-interactions, just sum at targets.
-%
-%  Args:
-%
-%  -  zk: complex
-%        Helmholtz parameter, k
-%  -  srcinfo: structure
-%        structure containing the following info about the sources:     
-%     *  srcinfo.sources: double(3,n)    
-%           source locations, $x_{j}$
-%     *  srcinfo.nd: integer
-%           number of charge/dipole vectors (optional, 
-%           default - nd = 1)
-%     *  srcinfo.charges: complex(nd,n) 
-%           charge densities, $c_{j}$ (optional, 
-%           default - term corresponding to charges dropped)
-%     *  srcinfo.dipoles: complex(nd,3,n) 
-%           dipole orientation vectors, $v_{j}$ (optional
-%           default - term corresponding to dipoles dropped) 
-%  -  targ: double(3,nt)
-%        target locations, $t_{i}$ 
-%  -  pgt: integer
-%        | target eval flag 
-%        | potential at targets evaluated if pgt = 1
-%        | potential and gradient at targets evaluated if pgt=2  
-%  
-%  Returns:
-%  
-%  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
-%  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
+%  Kernel definitions, input and outputs arguments are identical to
+%  hfmm3d (see that function for all definitions), apart from:
+%  1) the first argument (eps) is absent.
+%  2) there are currently no outputs at sources, meaning that U.pot
+%     and U.grad are missing (as if pg=0). In other words,
+%     just targets for now, and targ is thus not an optional argument.
 %
 % See also: HFMM3D
 
diff --git a/matlab/l3ddir.m b/matlab/l3ddir.m
index 314029ab..858f1908 100644
--- a/matlab/l3ddir.m
+++ b/matlab/l3ddir.m
@@ -3,49 +3,16 @@
 %
 % U = l3ddir(srcinfo,targ,pgt)
 %
-%  This subroutine computes the N-body Laplace
-%  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $1/r$, namely
-% 
-%    u(x) = \sum_{j=1}^{N} c_{j} \frac{1}{\|x-x_{j}\|} - 
-%      v_{j} \cdot \nabla \left( \frac{1}{\|x-x_{j}\|}\right)   
+%  Laplace direct evaluation in R^3: evaluate all pairwise particle
+%  interactions with targets. This is the slow O(N^2) direct code used
+%  as a reference for testing the (fast) code lfmm3d.
 %
-%  where $c_{j}$ are the charge densities,
-%  $v_{j}$ are the dipole orientation vectors, and
-%  $x_{j}$ are the source locations.
-%  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
-%  from the sum.
-% 
-%  The sum is evaluated directly (slow code for testing).
-%  Note: currently no self-interactions, just sum at targets.
-%
-%  Args:
-%
-%  -  srcinfo: structure
-%        structure containing the following info about the sources:
-%     *  srcinfo.sources: double(3,n)    
-%           source locations, $x_{j}$
-%     *  srcinfo.nd: integer
-%           number of charge/dipole vectors (optional, 
-%           default - nd = 1)
-%     *  srcinfo.charges: double(nd,n) 
-%           charge densities, $c_{j}$ (optional, 
-%           default - term corresponding to charges dropped)
-%     *  srcinfo.dipoles: double(nd,3,n) 
-%           dipole orientation vectors, $v_{j}$ (optional
-%           default - term corresponding to dipoles dropped) 
-%  -  targ: double(3,nt)
-%        target locations, $t_{i}$ (optional)
-%  -  pgt: integer
-%        | target eval flag (optional)
-%        | potential at targets evaluated if pgt = 1
-%        | potential and gradient at targets evaluated if pgt=2 
-%        | potential, gradient and hessian at targets evaluated if pgt=3
-%  
-%  Returns:
-%  -  U.pottarg:  potential at target locations, if requested, $u(t_{i})$
-%  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
-%  -  U.hesstarg: hessian at target locations, if requested, $\nabla^2 u(t_{i})$
+%  Kernel definitions, input and outputs arguments are identical to
+%  lfmm3d (see that function for all definitions), apart from:
+%  1) the first argument (eps) is absent.
+%  2) there are currently no outputs at sources, meaning that U.pot, U.grad,
+%     and U.hess are missing (as if pg=0). In other words,
+%     just targets for now, and targ is thus not an optional argument.
 %
 % See also: LFMM3D
 

From d84c5743a9b43813b258d642133e7b09443dce54 Mon Sep 17 00:00:00 2001
From: Travis Askham <askhamwhat@gmail.com>
Date: Tue, 8 Aug 2023 11:03:54 -0400
Subject: [PATCH 09/14] add some clarification about evaluation points for sums

---
 matlab/hfmm3d.m  |  5 +++++
 matlab/lfmm3d.m  |  5 +++++
 matlab/stfmm3d.m | 11 ++++++++---
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/matlab/hfmm3d.m b/matlab/hfmm3d.m
index ae57947d..56c2257a 100644
--- a/matlab/hfmm3d.m
+++ b/matlab/hfmm3d.m
@@ -18,6 +18,11 @@
 %  where $c_{j}$ are the charge densities
 %  $v_{j}$ are the dipole orientation vectors, and
 %  $x_{j}$ are the source locations.
+%  
+%  There are two options for the evaluation points $x$. These
+%  can be the sources themselves (pg > 0; see below) or other
+%  target points of interest (pgt > 0; see below). Both options
+%  can be selected in one call.
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
 %  from the sum.
 % 
diff --git a/matlab/lfmm3d.m b/matlab/lfmm3d.m
index e24115e6..956b210a 100644
--- a/matlab/lfmm3d.m
+++ b/matlab/lfmm3d.m
@@ -18,6 +18,11 @@
 %  where $c_{j}$ are the charge densities,
 %  $v_{j}$ are the dipole orientation vectors, and
 %  $x_{j}$ are the source locations.
+%
+%  There are two options for the evaluation points $x$. These
+%  can be the sources themselves (pg > 0; see below) or other
+%  target points of interest (pgt > 0; see below). Both options
+%  can be selected in one call.
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
 %  from the sum.
 % 
diff --git a/matlab/stfmm3d.m b/matlab/stfmm3d.m
index c59afc6f..adfd9094 100644
--- a/matlab/stfmm3d.m
+++ b/matlab/stfmm3d.m
@@ -13,14 +13,19 @@
 %       u_i(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
 %                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
 %
-%  for x at all of the target locations, and i=1,2,3,
+%  for each requested evaluation point x and i=1,2,3,
 %  where sigma^{(m)} is the Stokeslet force, mu^{(m)} is the
 %  stresslet strength, and nu^{(m)} is the stresslet orientation
 %  (note that each of these is a 3 vector per source point y^{(m)}).
 %  The stokeslet kernel G and stresslet kernel T are defined below.
 %  Repeated indices are taken as summed over 1,2,3, ie, Einstein
-%  convention. For x a source point, the self-interaction in the
-%  sum is omitted.
+%  convention.
+%  There are two options for the evaluation points $x$. These
+%  can be the sources themselves (ifppreg > 0; see below) or other
+%  target points of interest (ifppregtarg > 0; see below). Both options
+%  can be selected in one call.
+%  When $x=y_{(m)}$, the term corresponding to $y^{(m)}$ is dropped
+%  from the sum.
 %
 %  Optionally, the associated pressure p(x) and 3x3 gradient tensor
 %  grad u(x) are returned,

From d81dfaaeaa0f932b903c58832b59d76165e1906d Mon Sep 17 00:00:00 2001
From: Travis Askham <askhamwhat@gmail.com>
Date: Tue, 8 Aug 2023 11:08:28 -0400
Subject: [PATCH 10/14] small clean

---
 matlab/stfmm3d.m | 1 +
 1 file changed, 1 insertion(+)

diff --git a/matlab/stfmm3d.m b/matlab/stfmm3d.m
index adfd9094..d5e66b85 100644
--- a/matlab/stfmm3d.m
+++ b/matlab/stfmm3d.m
@@ -20,6 +20,7 @@
 %  The stokeslet kernel G and stresslet kernel T are defined below.
 %  Repeated indices are taken as summed over 1,2,3, ie, Einstein
 %  convention.
+%
 %  There are two options for the evaluation points $x$. These
 %  can be the sources themselves (ifppreg > 0; see below) or other
 %  target points of interest (ifppregtarg > 0; see below). Both options

From 281893d22c8734ea95087fcba654e343835e3c2b Mon Sep 17 00:00:00 2001
From: Travis Askham <askhamwhat@gmail.com>
Date: Tue, 8 Aug 2023 11:39:23 -0400
Subject: [PATCH 11/14] add math formulae for emfmm3d

---
 matlab/em3ddir.m |  1 -
 matlab/emfmm3d.m | 30 ++++++++++++++++++++++--------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/matlab/em3ddir.m b/matlab/em3ddir.m
index d57a163c..aeeb8c99 100644
--- a/matlab/em3ddir.m
+++ b/matlab/em3ddir.m
@@ -4,7 +4,6 @@
 % U = em3ddir(zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
 %  Maxwell direct evaluation in R^3: evaluate all pairwise particle
-%  interactions (ignoring self-interactions) and
 %  interactions with targs. This is the slow O(N^2) direct code used
 %  as a reference for testing the fast code emfmm3d.
 %
diff --git a/matlab/emfmm3d.m b/matlab/emfmm3d.m
index 1d96c172..839d3953 100644
--- a/matlab/emfmm3d.m
+++ b/matlab/emfmm3d.m
@@ -3,17 +3,31 @@
 %
 %  U = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
-%  Frequency-domain Maxwell FMM in R^3: evaluate all pairwise particle
-%  interactions (ignoring self-interactions) and
-%  interactions with targets, using the fast multipole method
-%  with precision eps.
+%  Frequency-domain Maxwell FMM in R^3: evaluate all pair-wise
+%  particle interactions with targets using the fast multipole
+%  method with precision eps.
 %
-%  This subroutine evaluates sums implied by the operator representation
+%  Specifically, this subroutine computes a sum for the electric field
 %
-%      E = curl S_{k}[h_current] + S_{k}[e_current] + grad S_{k}[e_charge]
+%      E(x) = sum_m curl G_k(x,y^{(m)}) h_current_m
+%                 + G_k(x,y^{(m)}) e_current_m 
+%                 + grad G_k(x,y^{(m)}) e_charge_m
 %
-%  by calling the vector Helmholtz FMM.
-%  With appropriate input flags, the subroutine also computes divE, curlE.
+%  for each requested evaluation point x, where h_current and e_current
+%  are 3-vector densities and e_charge is a scalar density supplied
+%  at each source point y^{(m)}. G_k is the Helmholtz Green function
+%  without the 1/(4pi) scaling:
+%
+%      G_k(x,y) = e^(ik|x-y|)/|x-y|.
+%
+%  In contrast with other FMM routines in the library, this routine
+%  has only 1 option for the evaluation points: they are specified
+%  as targets. If a target x coincides with a source point y^{(m)}
+%  that term in the sum is omitted. 
+%
+%  The electric field is, naturally, a 3-vector at each point x.
+%  With appropriate input flags, the subroutine also computes divE,
+%  curlE. 
 %
 %  Remark: the subroutine uses a stabilized representation
 %  for computing the divergence by using integration by parts

From fcebcbd1d7a69dfcee94a3ad733b3af4558a9894 Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@flatironinstitute.org>
Date: Tue, 8 Aug 2023 14:34:56 -0400
Subject: [PATCH 12/14] updating test file names to be consistent and fixing
 exit in legacy tests

---
 matlab/{test_emfmm3d.m => emfmm3dTest.m}                | 0
 matlab/{test_hfmm3dpart_direct.m => hfmm3dLegacyTest.m} | 3 ---
 matlab/{test_hfmm3d.m => hfmm3dTest.m}                  | 0
 matlab/{test_lfmm3dpart_direct.m => lfmm3dLegacyTest.m} | 0
 matlab/{test_lfmm3d.m => lfmm3dTest.m}                  | 0
 matlab/{stfmm3d_simpletest.m => stfmm3dPerfTest.m}      | 0
 matlab/{test_stfmm3d.m => stfmm3dTest.m}                | 0
 7 files changed, 3 deletions(-)
 rename matlab/{test_emfmm3d.m => emfmm3dTest.m} (100%)
 rename matlab/{test_hfmm3dpart_direct.m => hfmm3dLegacyTest.m} (99%)
 rename matlab/{test_hfmm3d.m => hfmm3dTest.m} (100%)
 rename matlab/{test_lfmm3dpart_direct.m => lfmm3dLegacyTest.m} (100%)
 rename matlab/{test_lfmm3d.m => lfmm3dTest.m} (100%)
 rename matlab/{stfmm3d_simpletest.m => stfmm3dPerfTest.m} (100%)
 rename matlab/{test_stfmm3d.m => stfmm3dTest.m} (100%)

diff --git a/matlab/test_emfmm3d.m b/matlab/emfmm3dTest.m
similarity index 100%
rename from matlab/test_emfmm3d.m
rename to matlab/emfmm3dTest.m
diff --git a/matlab/test_hfmm3dpart_direct.m b/matlab/hfmm3dLegacyTest.m
similarity index 99%
rename from matlab/test_hfmm3dpart_direct.m
rename to matlab/hfmm3dLegacyTest.m
index fb4d0bc9..1bdd04f7 100644
--- a/matlab/test_hfmm3dpart_direct.m
+++ b/matlab/hfmm3dLegacyTest.m
@@ -120,6 +120,3 @@
     norm(F.fldtarg,2)
 end
 %%%break;
-
-
-exit;
diff --git a/matlab/test_hfmm3d.m b/matlab/hfmm3dTest.m
similarity index 100%
rename from matlab/test_hfmm3d.m
rename to matlab/hfmm3dTest.m
diff --git a/matlab/test_lfmm3dpart_direct.m b/matlab/lfmm3dLegacyTest.m
similarity index 100%
rename from matlab/test_lfmm3dpart_direct.m
rename to matlab/lfmm3dLegacyTest.m
diff --git a/matlab/test_lfmm3d.m b/matlab/lfmm3dTest.m
similarity index 100%
rename from matlab/test_lfmm3d.m
rename to matlab/lfmm3dTest.m
diff --git a/matlab/stfmm3d_simpletest.m b/matlab/stfmm3dPerfTest.m
similarity index 100%
rename from matlab/stfmm3d_simpletest.m
rename to matlab/stfmm3dPerfTest.m
diff --git a/matlab/test_stfmm3d.m b/matlab/stfmm3dTest.m
similarity index 100%
rename from matlab/test_stfmm3d.m
rename to matlab/stfmm3dTest.m

From 9eb1ff4928c36cce0729f2db2e8fd4a63ab5ee08 Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@flatironinstitute.org>
Date: Tue, 8 Aug 2023 15:36:49 -0400
Subject: [PATCH 13/14] updating contents

---
 matlab/Contents.m | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/matlab/Contents.m b/matlab/Contents.m
index 93568a79..febb3f94 100644
--- a/matlab/Contents.m
+++ b/matlab/Contents.m
@@ -12,10 +12,11 @@
 %   st3ddir  - Direct (slow) 3D Stokes kernel sums (reference for STFMM3D).
 %
 % For tester driver scripts see:
-%   test_lfmm3d
-%   test_hfmm3d
-%   test_emfmm3d
-%   test_stfmm3d
+%   lfmm3dTest
+%   hfmm3dTest
+%   emfmm3dTest
+%   stfmm3dTest
+%   stfmm3dPerfTest
 %
 % For examples of use see:
 %   lfmm3d_example
@@ -24,12 +25,11 @@
 %   hfmm3d_big_example
 %   emfmm3d_example
 %   stfmm3d_example
-%   stfmm3d_simpletest
 %
 % Legacy codes/interfaces (from the Gimbutas-Greengard CMCL 2012 library):
 %   lfmm3dpart       - Laplace particle targ FMM in R^3.
 %   l3dpartdirect    - Laplace interactions in R^3, direct (slow) evaluation.
 %   hfmm3dpart       - Helmholtz particle targ FMM in R^3.
 %   h3dpartdirect    - Helmholtz interactions in R^3, direct (slow) evaluation.
-%   test_lfmm3dpart_direct - Test Laplace particle FMMs in R^3
-%   test_hfmm3dpart_direct - Test Helmholtz particle FMMs in R^3
+%   lfmm3dLegacyTest - Test Laplace particle FMMs in R^3
+%   hfmm3dLegacyTest - Test Helmholtz particle FMMs in R^3

From eef96343168d24e2b1e5bcf05adfabb89b654444 Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@flatironinstitute.org>
Date: Tue, 8 Aug 2023 16:00:21 -0400
Subject: [PATCH 14/14] updating docs via mw

---
 matlab/em3ddir.m |   2 +-
 matlab/emfmm3d.m |   7 ++-
 matlab/fmm3d.c   | 144 ++++++++++++++++++++++----------------------
 matlab/fmm3d.mw  | 152 ++++++++++++++++++-----------------------------
 matlab/hfmm3d.m  |   2 +-
 matlab/stfmm3d.m |   1 -
 6 files changed, 135 insertions(+), 173 deletions(-)

diff --git a/matlab/em3ddir.m b/matlab/em3ddir.m
index aeeb8c99..f101767a 100644
--- a/matlab/em3ddir.m
+++ b/matlab/em3ddir.m
@@ -4,7 +4,7 @@
 % U = em3ddir(zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
 %  Maxwell direct evaluation in R^3: evaluate all pairwise particle
-%  interactions with targs. This is the slow O(N^2) direct code used
+%  interactions with targets. This is the slow O(N^2) direct code used
 %  as a reference for testing the fast code emfmm3d.
 %
 %  Kernel definitions, input and outputs arguments are identical to
diff --git a/matlab/emfmm3d.m b/matlab/emfmm3d.m
index 839d3953..6bba3e2b 100644
--- a/matlab/emfmm3d.m
+++ b/matlab/emfmm3d.m
@@ -3,9 +3,10 @@
 %
 %  U = emfmm3d(eps,zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
-%  Frequency-domain Maxwell FMM in R^3: evaluate all pair-wise
-%  particle interactions with targets using the fast multipole
-%  method with precision eps.
+%  Frequency-domain Maxwell FMM in R^3: evaluate all pairwise particle
+%  interactions (ignoring self-interactions) and
+%  interactions with targets, using the fast multipole method
+%  with precision eps.
 %
 %  Specifically, this subroutine computes a sum for the electric field
 %
diff --git a/matlab/fmm3d.c b/matlab/fmm3d.c
index 4847b085..8d30f43e 100644
--- a/matlab/fmm3d.c
+++ b/matlab/fmm3d.c
@@ -1201,7 +1201,7 @@ MWF77_RETURN MWF77_st3ddirectstokstrsg(int*, double*, double*, int*, double*, do
 } /* end extern C */
 #endif
 
-/* ---- fmm3d.mw: 176 ----
+/* ---- fmm3d.mw: 181 ----
  * hndiv(double[1] eps, int[1] ns, int[1] nt, int[1] ifcharge, int[1] ifdipole, int[1] pg, int[1] pgt, inout int[1] ndiv, inout int[1] idivflag);
  */
 static const char* stubids1_ = "hndiv(i double[x], i int[x], i int[x], i int[x], i int[x], i int[x], i int[x], io int[x], io int[x])";
@@ -1355,7 +1355,7 @@ void mexStub1(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 191 ----
+/* ---- fmm3d.mw: 196 ----
  * hfmm3d_ndiv(int[1] nd, double[1] eps, dcomplex[1] zk, int[1] ns, double[3, ns] sources, int[1] ifcharge, dcomplex[nd, ns] charges, int[1] ifdipole, dcomplex[nd3, ns] dipoles, int[1] iper, int[1] pg, inout dcomplex[nd, ns] pot, inout dcomplex[nd3, ns] grad, inout dcomplex[nd6, ns] hess, int[1] nt, double[3, ntuse] targ, int[1] pgt, inout dcomplex[nd, ntuse] pottarg, inout dcomplex[nd3, ntuse] gradtarg, inout dcomplex[nd6, ntuse] hesstarg, int[1] ndiv, int[1] idivflag, int[1] ifnear, inout double[6] timeinfo, inout int[1] ier);
  */
 static const char* stubids2_ = "hfmm3d_ndiv(i int[x], i double[x], i dcomplex[x], i int[x], i double[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i int[x], io dcomplex[xx], io dcomplex[xx], io dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], io dcomplex[xx], io dcomplex[xx], i int[x], i int[x], i int[x], io double[x], io int[x])";
@@ -1820,7 +1820,7 @@ void mexStub2(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 307 ----
+/* ---- fmm3d.mw: 278 ----
  * h3ddirectcp(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids3_ = "h3ddirectcp(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i double[x])";
@@ -2005,7 +2005,7 @@ void mexStub3(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 310 ----
+/* ---- fmm3d.mw: 281 ----
  * h3ddirectdp(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids4_ = "h3ddirectdp(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i double[x])";
@@ -2190,7 +2190,7 @@ void mexStub4(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 313 ----
+/* ---- fmm3d.mw: 284 ----
  * h3ddirectcdp(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd, ns] charges, dcomplex[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids5_ = "h3ddirectcdp(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i double[x])";
@@ -2396,7 +2396,7 @@ void mexStub5(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 319 ----
+/* ---- fmm3d.mw: 290 ----
  * h3ddirectcg(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, inout dcomplex[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids6_ = "h3ddirectcg(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], io dcomplex[xx], i double[x])";
@@ -2604,7 +2604,7 @@ void mexStub6(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 322 ----
+/* ---- fmm3d.mw: 293 ----
  * h3ddirectdg(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, inout dcomplex[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids7_ = "h3ddirectdg(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], io dcomplex[xx], i double[x])";
@@ -2812,7 +2812,7 @@ void mexStub7(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 325 ----
+/* ---- fmm3d.mw: 296 ----
  * h3ddirectcdg(int[1] nd, dcomplex[1] zk, double[3, ns] sources, dcomplex[nd, ns] charges, dcomplex[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout dcomplex[nd, nt] pottarg, inout dcomplex[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids8_ = "h3ddirectcdg(i int[x], i dcomplex[x], i double[xx], i dcomplex[xx], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], io dcomplex[xx], i double[x])";
@@ -3041,7 +3041,7 @@ void mexStub8(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 509 ----
+/* ---- fmm3d.mw: 485 ----
  * lndiv(double[1] eps, int[1] ns, int[1] nt, int[1] ifcharge, int[1] ifdipole, int[1] pg, int[1] pgt, inout int[1] ndiv, inout int[1] idivflag);
  */
 static const char* stubids9_ = "lndiv(i double[x], i int[x], i int[x], i int[x], i int[x], i int[x], i int[x], io int[x], io int[x])";
@@ -3195,7 +3195,7 @@ void mexStub9(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 524 ----
+/* ---- fmm3d.mw: 500 ----
  * lfmm3d_ndiv(int[1] nd, double[1] eps, int[1] ns, double[3, ns] sources, int[1] ifcharge, double[nd, ns] charges, int[1] ifdipole, double[nd3, ns] dipoles, int[1] iper, int[1] pg, inout double[nd, ns] pot, inout double[nd3, ns] grad, inout double[nd6, ns] hess, int[1] nt, double[3, ntuse] targ, int[1] pgt, inout double[nd, ntuse] pottarg, inout double[nd3, ntuse] gradtarg, inout double[nd6, ntuse] hesstarg, int[1] ndiv, int[1] idivflag, int[1] ifnear, inout double[6] timeinfo, inout int[1] ier);
  */
 static const char* stubids10_ = "lfmm3d_ndiv(i int[x], i double[x], i int[x], i double[xx], i int[x], i double[xx], i int[x], i double[xx], i int[x], i int[x], io double[xx], io double[xx], io double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i int[x], i int[x], i int[x], io double[x], io int[x])";
@@ -3627,7 +3627,7 @@ void mexStub10(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 644 ----
+/* ---- fmm3d.mw: 587 ----
  * l3ddirectcp(int[1] nd, double[3, ns] sources, double[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids11_ = "l3ddirectcp(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], i double[x])";
@@ -3793,7 +3793,7 @@ void mexStub11(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 647 ----
+/* ---- fmm3d.mw: 590 ----
  * l3ddirectdp(int[1] nd, double[3, ns] sources, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids12_ = "l3ddirectdp(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], i double[x])";
@@ -3959,7 +3959,7 @@ void mexStub12(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 650 ----
+/* ---- fmm3d.mw: 593 ----
  * l3ddirectcdp(int[1] nd, double[3, ns] sources, double[nd, ns] charges, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, double[1] thresh);
  */
 static const char* stubids13_ = "l3ddirectcdp(i int[x], i double[xx], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], i double[x])";
@@ -4147,7 +4147,7 @@ void mexStub13(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 656 ----
+/* ---- fmm3d.mw: 599 ----
  * l3ddirectcg(int[1] nd, double[3, ns] sources, double[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids14_ = "l3ddirectcg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], i double[x])";
@@ -4333,7 +4333,7 @@ void mexStub14(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 659 ----
+/* ---- fmm3d.mw: 602 ----
  * l3ddirectdg(int[1] nd, double[3, ns] sources, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids15_ = "l3ddirectdg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], i double[x])";
@@ -4519,7 +4519,7 @@ void mexStub15(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 662 ----
+/* ---- fmm3d.mw: 605 ----
  * l3ddirectcdg(int[1] nd, double[3, ns] sources, double[nd, ns] charges, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids16_ = "l3ddirectcdg(i int[x], i double[xx], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], i double[x])";
@@ -4727,7 +4727,7 @@ void mexStub16(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 669 ----
+/* ---- fmm3d.mw: 612 ----
  * l3ddirectch(int[1] nd, double[3, ns] sources, double[nd, ns] charges, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, inout double[nd6, nt] hesstarg, double[1] thresh);
  */
 static const char* stubids17_ = "l3ddirectch(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -4933,7 +4933,7 @@ void mexStub17(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 672 ----
+/* ---- fmm3d.mw: 615 ----
  * l3ddirectdh(int[1] nd, double[3, ns] sources, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, inout double[nd6, nt] hesstarg, double[1] thresh);
  */
 static const char* stubids18_ = "l3ddirectdh(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -5139,7 +5139,7 @@ void mexStub18(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 675 ----
+/* ---- fmm3d.mw: 618 ----
  * l3ddirectcdh(int[1] nd, double[3, ns] sources, double[nd, ns] charges, double[nd3, ns] dipoles, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd, nt] pottarg, inout double[nd3, nt] gradtarg, inout double[nd6, nt] hesstarg, double[1] thresh);
  */
 static const char* stubids19_ = "l3ddirectcdh(i int[x], i double[xx], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -5367,7 +5367,7 @@ void mexStub19(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 822 ----
+/* ---- fmm3d.mw: 780 ----
  * emfmm3d(int[1] nd, double[1] eps, dcomplex[1] zk, int[1] ns, double[3, ns] sources, int[1] ifh_current, dcomplex[nd3, ns_h_current] h_current, int[1] ife_current, dcomplex[nd3, ns_e_current] e_current, int[1] ife_charge, dcomplex[nd, ns_e_charge] e_charge, int[1] nt, double[3, nt] targ, int[1] ifE, inout dcomplex[nd3, nt_E] E, int[1] ifcurlE, inout dcomplex[nd3, nt_curlE] curlE, int[1] ifdivE, inout dcomplex[nd, nt_divE] divE, inout int[1] ier);
  */
 static const char* stubids20_ = "emfmm3d(i int[x], i double[x], i dcomplex[x], i int[x], i double[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i int[x], io dcomplex[xx], i int[x], io dcomplex[xx], io int[x])";
@@ -5740,7 +5740,7 @@ void mexStub20(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 930 ----
+/* ---- fmm3d.mw: 887 ----
  * em3ddirect(int[1] nd, dcomplex[1] zk, int[1] ns, double[3, ns] sources, int[1] ifh_current, dcomplex[nd3, ns_h_current] h_current, int[1] ife_current, dcomplex[nd3, ns_e_current] e_current, int[1] ife_charge, dcomplex[nd, ns_e_charge] e_charge, int[1] nt, double[3, nt] targ, int[1] ifE, inout dcomplex[nd3, nt_E] E, int[1] ifcurlE, inout dcomplex[nd3, nt_curlE] curlE, int[1] ifdivE, inout dcomplex[nd, nt_divE] divE, double[1] thresh);
  */
 static const char* stubids21_ = "em3ddirect(i int[x], i dcomplex[x], i int[x], i double[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i dcomplex[xx], i int[x], i double[xx], i int[x], io dcomplex[xx], i int[x], io dcomplex[xx], i int[x], io dcomplex[xx], i double[x])";
@@ -6097,7 +6097,7 @@ void mexStub21(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 1120 ----
+/* ---- fmm3d.mw: 1082 ----
  * stfmm3d(int[1] nd, double[1] eps, int[1] ns, double[3, ns] sources, int[1] ifstoklet, double[nd3, ns_stok] stoklet, int[1] ifstrslet, double[nd3, ns_strs] strslet, double[nd3, ns_strs] strsvec, int[1] ifppreg, inout double[nd3, ns_pot] pot, inout double[nd, ns_pre] pre, inout double[nd9, ns_grad] grad, int[1] nt, double[3, nt] targ, int[1] ifppregtarg, inout double[nd3, nt_pot] pottarg, inout double[nd, nt_pre] pretarg, inout double[nd9, nt_grad] gradtarg, inout int[1] ier);
  */
 static const char* stubids22_ = "stfmm3d(i int[x], i double[x], i int[x], i double[xx], i int[x], i double[xx], i int[x], i double[xx], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], io int[x])";
@@ -6479,7 +6479,7 @@ void mexStub22(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 1217 ----
+/* ---- fmm3d.mw: 1179 ----
  * st3ddirectstokg(int[1] nd, double[3, ns] sources, double[nd3, ns_stok] stoklet, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd3, nt] pottarg, inout double[nd, nt] pretarg, inout double[nd9, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids23_ = "st3ddirectstokg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -6685,7 +6685,7 @@ void mexStub23(int nlhs, mxArray* plhs[],
         mexErrMsgTxt(mw_err_txt_);
 }
 
-/* ---- fmm3d.mw: 1220 ----
+/* ---- fmm3d.mw: 1182 ----
  * st3ddirectstokstrsg(int[1] nd, double[3, ns] sources, double[nd3, ns_stok] stoklet, int[1] istress, double[nd3, ns_strs] strslet, double[nd3, ns_strs] strsvec, int[1] ns, double[3, nt] targ, int[1] nt, inout double[nd3, nt] pottarg, inout double[nd, nt] pretarg, inout double[nd9, nt] gradtarg, double[1] thresh);
  */
 static const char* stubids24_ = "st3ddirectstokstrsg(i int[x], i double[xx], i double[xx], i int[x], i double[xx], i double[xx], i int[x], i double[xx], i int[x], io double[xx], io double[xx], io double[xx], i double[x])";
@@ -7025,30 +7025,30 @@ void mexFunction(int nlhs, mxArray* plhs[],
     } else if (strcmp(id, "*profile report*") == 0) {
         if (!mexprofrecord_)
             mexPrintf("Profiler inactive\n");
-        mexPrintf("%d calls to fmm3d.mw:176\n", mexprofrecord_[1]);
-        mexPrintf("%d calls to fmm3d.mw:191\n", mexprofrecord_[2]);
-        mexPrintf("%d calls to fmm3d.mw:307\n", mexprofrecord_[3]);
-        mexPrintf("%d calls to fmm3d.mw:310\n", mexprofrecord_[4]);
-        mexPrintf("%d calls to fmm3d.mw:313\n", mexprofrecord_[5]);
-        mexPrintf("%d calls to fmm3d.mw:319\n", mexprofrecord_[6]);
-        mexPrintf("%d calls to fmm3d.mw:322\n", mexprofrecord_[7]);
-        mexPrintf("%d calls to fmm3d.mw:325\n", mexprofrecord_[8]);
-        mexPrintf("%d calls to fmm3d.mw:509\n", mexprofrecord_[9]);
-        mexPrintf("%d calls to fmm3d.mw:524\n", mexprofrecord_[10]);
-        mexPrintf("%d calls to fmm3d.mw:644\n", mexprofrecord_[11]);
-        mexPrintf("%d calls to fmm3d.mw:647\n", mexprofrecord_[12]);
-        mexPrintf("%d calls to fmm3d.mw:650\n", mexprofrecord_[13]);
-        mexPrintf("%d calls to fmm3d.mw:656\n", mexprofrecord_[14]);
-        mexPrintf("%d calls to fmm3d.mw:659\n", mexprofrecord_[15]);
-        mexPrintf("%d calls to fmm3d.mw:662\n", mexprofrecord_[16]);
-        mexPrintf("%d calls to fmm3d.mw:669\n", mexprofrecord_[17]);
-        mexPrintf("%d calls to fmm3d.mw:672\n", mexprofrecord_[18]);
-        mexPrintf("%d calls to fmm3d.mw:675\n", mexprofrecord_[19]);
-        mexPrintf("%d calls to fmm3d.mw:822\n", mexprofrecord_[20]);
-        mexPrintf("%d calls to fmm3d.mw:930\n", mexprofrecord_[21]);
-        mexPrintf("%d calls to fmm3d.mw:1120\n", mexprofrecord_[22]);
-        mexPrintf("%d calls to fmm3d.mw:1217\n", mexprofrecord_[23]);
-        mexPrintf("%d calls to fmm3d.mw:1220\n", mexprofrecord_[24]);
+        mexPrintf("%d calls to fmm3d.mw:181\n", mexprofrecord_[1]);
+        mexPrintf("%d calls to fmm3d.mw:196\n", mexprofrecord_[2]);
+        mexPrintf("%d calls to fmm3d.mw:278\n", mexprofrecord_[3]);
+        mexPrintf("%d calls to fmm3d.mw:281\n", mexprofrecord_[4]);
+        mexPrintf("%d calls to fmm3d.mw:284\n", mexprofrecord_[5]);
+        mexPrintf("%d calls to fmm3d.mw:290\n", mexprofrecord_[6]);
+        mexPrintf("%d calls to fmm3d.mw:293\n", mexprofrecord_[7]);
+        mexPrintf("%d calls to fmm3d.mw:296\n", mexprofrecord_[8]);
+        mexPrintf("%d calls to fmm3d.mw:485\n", mexprofrecord_[9]);
+        mexPrintf("%d calls to fmm3d.mw:500\n", mexprofrecord_[10]);
+        mexPrintf("%d calls to fmm3d.mw:587\n", mexprofrecord_[11]);
+        mexPrintf("%d calls to fmm3d.mw:590\n", mexprofrecord_[12]);
+        mexPrintf("%d calls to fmm3d.mw:593\n", mexprofrecord_[13]);
+        mexPrintf("%d calls to fmm3d.mw:599\n", mexprofrecord_[14]);
+        mexPrintf("%d calls to fmm3d.mw:602\n", mexprofrecord_[15]);
+        mexPrintf("%d calls to fmm3d.mw:605\n", mexprofrecord_[16]);
+        mexPrintf("%d calls to fmm3d.mw:612\n", mexprofrecord_[17]);
+        mexPrintf("%d calls to fmm3d.mw:615\n", mexprofrecord_[18]);
+        mexPrintf("%d calls to fmm3d.mw:618\n", mexprofrecord_[19]);
+        mexPrintf("%d calls to fmm3d.mw:780\n", mexprofrecord_[20]);
+        mexPrintf("%d calls to fmm3d.mw:887\n", mexprofrecord_[21]);
+        mexPrintf("%d calls to fmm3d.mw:1082\n", mexprofrecord_[22]);
+        mexPrintf("%d calls to fmm3d.mw:1179\n", mexprofrecord_[23]);
+        mexPrintf("%d calls to fmm3d.mw:1182\n", mexprofrecord_[24]);
     } else if (strcmp(id, "*profile log*") == 0) {
         FILE* logfp;
         if (nrhs != 2 || mxGetString(prhs[1], id, sizeof(id)) != 0)
@@ -7058,30 +7058,30 @@ void mexFunction(int nlhs, mxArray* plhs[],
             mexErrMsgTxt("Cannot open log for output");
         if (!mexprofrecord_)
             fprintf(logfp, "Profiler inactive\n");
-        fprintf(logfp, "%d calls to fmm3d.mw:176\n", mexprofrecord_[1]);
-        fprintf(logfp, "%d calls to fmm3d.mw:191\n", mexprofrecord_[2]);
-        fprintf(logfp, "%d calls to fmm3d.mw:307\n", mexprofrecord_[3]);
-        fprintf(logfp, "%d calls to fmm3d.mw:310\n", mexprofrecord_[4]);
-        fprintf(logfp, "%d calls to fmm3d.mw:313\n", mexprofrecord_[5]);
-        fprintf(logfp, "%d calls to fmm3d.mw:319\n", mexprofrecord_[6]);
-        fprintf(logfp, "%d calls to fmm3d.mw:322\n", mexprofrecord_[7]);
-        fprintf(logfp, "%d calls to fmm3d.mw:325\n", mexprofrecord_[8]);
-        fprintf(logfp, "%d calls to fmm3d.mw:509\n", mexprofrecord_[9]);
-        fprintf(logfp, "%d calls to fmm3d.mw:524\n", mexprofrecord_[10]);
-        fprintf(logfp, "%d calls to fmm3d.mw:644\n", mexprofrecord_[11]);
-        fprintf(logfp, "%d calls to fmm3d.mw:647\n", mexprofrecord_[12]);
-        fprintf(logfp, "%d calls to fmm3d.mw:650\n", mexprofrecord_[13]);
-        fprintf(logfp, "%d calls to fmm3d.mw:656\n", mexprofrecord_[14]);
-        fprintf(logfp, "%d calls to fmm3d.mw:659\n", mexprofrecord_[15]);
-        fprintf(logfp, "%d calls to fmm3d.mw:662\n", mexprofrecord_[16]);
-        fprintf(logfp, "%d calls to fmm3d.mw:669\n", mexprofrecord_[17]);
-        fprintf(logfp, "%d calls to fmm3d.mw:672\n", mexprofrecord_[18]);
-        fprintf(logfp, "%d calls to fmm3d.mw:675\n", mexprofrecord_[19]);
-        fprintf(logfp, "%d calls to fmm3d.mw:822\n", mexprofrecord_[20]);
-        fprintf(logfp, "%d calls to fmm3d.mw:930\n", mexprofrecord_[21]);
-        fprintf(logfp, "%d calls to fmm3d.mw:1120\n", mexprofrecord_[22]);
-        fprintf(logfp, "%d calls to fmm3d.mw:1217\n", mexprofrecord_[23]);
-        fprintf(logfp, "%d calls to fmm3d.mw:1220\n", mexprofrecord_[24]);
+        fprintf(logfp, "%d calls to fmm3d.mw:181\n", mexprofrecord_[1]);
+        fprintf(logfp, "%d calls to fmm3d.mw:196\n", mexprofrecord_[2]);
+        fprintf(logfp, "%d calls to fmm3d.mw:278\n", mexprofrecord_[3]);
+        fprintf(logfp, "%d calls to fmm3d.mw:281\n", mexprofrecord_[4]);
+        fprintf(logfp, "%d calls to fmm3d.mw:284\n", mexprofrecord_[5]);
+        fprintf(logfp, "%d calls to fmm3d.mw:290\n", mexprofrecord_[6]);
+        fprintf(logfp, "%d calls to fmm3d.mw:293\n", mexprofrecord_[7]);
+        fprintf(logfp, "%d calls to fmm3d.mw:296\n", mexprofrecord_[8]);
+        fprintf(logfp, "%d calls to fmm3d.mw:485\n", mexprofrecord_[9]);
+        fprintf(logfp, "%d calls to fmm3d.mw:500\n", mexprofrecord_[10]);
+        fprintf(logfp, "%d calls to fmm3d.mw:587\n", mexprofrecord_[11]);
+        fprintf(logfp, "%d calls to fmm3d.mw:590\n", mexprofrecord_[12]);
+        fprintf(logfp, "%d calls to fmm3d.mw:593\n", mexprofrecord_[13]);
+        fprintf(logfp, "%d calls to fmm3d.mw:599\n", mexprofrecord_[14]);
+        fprintf(logfp, "%d calls to fmm3d.mw:602\n", mexprofrecord_[15]);
+        fprintf(logfp, "%d calls to fmm3d.mw:605\n", mexprofrecord_[16]);
+        fprintf(logfp, "%d calls to fmm3d.mw:612\n", mexprofrecord_[17]);
+        fprintf(logfp, "%d calls to fmm3d.mw:615\n", mexprofrecord_[18]);
+        fprintf(logfp, "%d calls to fmm3d.mw:618\n", mexprofrecord_[19]);
+        fprintf(logfp, "%d calls to fmm3d.mw:780\n", mexprofrecord_[20]);
+        fprintf(logfp, "%d calls to fmm3d.mw:887\n", mexprofrecord_[21]);
+        fprintf(logfp, "%d calls to fmm3d.mw:1082\n", mexprofrecord_[22]);
+        fprintf(logfp, "%d calls to fmm3d.mw:1179\n", mexprofrecord_[23]);
+        fprintf(logfp, "%d calls to fmm3d.mw:1182\n", mexprofrecord_[24]);
         fclose(logfp);
     } else
         mexErrMsgTxt("Unknown identifier");
diff --git a/matlab/fmm3d.mw b/matlab/fmm3d.mw
index ee61da5a..5c499e6b 100644
--- a/matlab/fmm3d.mw
+++ b/matlab/fmm3d.mw
@@ -19,6 +19,11 @@
 %  where $c_{j}$ are the charge densities
 %  $v_{j}$ are the dipole orientation vectors, and
 %  $x_{j}$ are the source locations.
+%
+%  There are two options for the evaluation points $x$. These
+%  can be the sources themselves (pg > 0; see below) or other
+%  target points of interest (pgt > 0; see below). Both options
+%  can be selected in one call.
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
 %  from the sum.
 % 
@@ -211,50 +216,16 @@ end
 %
 %  U = h3ddir(zk,srcinfo,targ,pgt)
 %
-%  This subroutine computes the N-body Helmholtz
-%  interactions and its gradients in three dimensions, where 
-%  the interaction kernel is given by $e^{ikr}/r$,
-% 
-%    u(x) = \sum_{j=1}^{N} c_{j} \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|} - 
-%      v_{j} \cdot \nabla \left( \frac{e^{ik\|x-x_{j}\|}}{\|x-x_{j}\|}\right)   
-%
-%  where $c_{j}$ are the charge densities
-%  $v_{j}$ are the dipole orientation vectors, and
-%  $x_{j}$ are the source locations.
-%  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
-%  from the sum.
-%  
-%  The sum is evaluated directly (slow code for testing).
-%  Note: currently no self-interactions, just sum at targets.
-%
-%  Args:
+%  Helmholtz direct evaluation in R^3: evaluate all pairwise particle
+%  interactions with targets. This is the slow O(N^2) direct code used
+%  as a reference for testing the (fast) code hfmm3d.
 %
-%  -  zk: complex
-%        Helmholtz parameter, k
-%  -  srcinfo: structure
-%        structure containing the following info about the sources:     
-%     *  srcinfo.sources: double(3,n)    
-%           source locations, $x_{j}$
-%     *  srcinfo.nd: integer
-%           number of charge/dipole vectors (optional, 
-%           default - nd = 1)
-%     *  srcinfo.charges: complex(nd,n) 
-%           charge densities, $c_{j}$ (optional, 
-%           default - term corresponding to charges dropped)
-%     *  srcinfo.dipoles: complex(nd,3,n) 
-%           dipole orientation vectors, $v_{j}$ (optional
-%           default - term corresponding to dipoles dropped) 
-%  -  targ: double(3,nt)
-%        target locations, $t_{i}$ 
-%  -  pgt: integer
-%        | target eval flag 
-%        | potential at targets evaluated if pgt = 1
-%        | potential and gradient at targets evaluated if pgt=2  
-%  
-%  Returns:
-%  
-%  -  U.pottarg: potential at target locations, if requested, $u(t_{i})$
-%  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
+%  Kernel definitions, input and outputs arguments are identical to
+%  hfmm3d (see that function for all definitions), apart from:
+%  1) the first argument (eps) is absent.
+%  2) there are currently no outputs at sources, meaning that U.pot
+%     and U.grad are missing (as if pg=0). In other words,
+%     just targets for now, and targ is thus not an optional argument.
 %
 % See also: HFMM3D
 
@@ -350,6 +321,11 @@ end
 %  where $c_{j}$ are the charge densities,
 %  $v_{j}$ are the dipole orientation vectors, and
 %  $x_{j}$ are the source locations.
+%
+%  There are two options for the evaluation points $x$. These
+%  can be the sources themselves (pg > 0; see below) or other
+%  target points of interest (pgt > 0; see below). Both options
+%  can be selected in one call.
 %  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
 %  from the sum.
 % 
@@ -546,49 +522,16 @@ end
 %
 % U = l3ddir(srcinfo,targ,pgt)
 %
-%  This subroutine computes the N-body Laplace
-%  interactions and its gradients in three dimensions where 
-%  the interaction kernel is given by $1/r$, namely
-% 
-%    u(x) = \sum_{j=1}^{N} c_{j} \frac{1}{\|x-x_{j}\|} - 
-%      v_{j} \cdot \nabla \left( \frac{1}{\|x-x_{j}\|}\right)   
-%
-%  where $c_{j}$ are the charge densities,
-%  $v_{j}$ are the dipole orientation vectors, and
-%  $x_{j}$ are the source locations.
-%  When $x=x_{j}$, the term corresponding to $x_{j}$ is dropped
-%  from the sum.
-% 
-%  The sum is evaluated directly (slow code for testing).
-%  Note: currently no self-interactions, just sum at targets.
-%
-%  Args:
+%  Laplace direct evaluation in R^3: evaluate all pairwise particle
+%  interactions with targets. This is the slow O(N^2) direct code used
+%  as a reference for testing the (fast) code lfmm3d.
 %
-%  -  srcinfo: structure
-%        structure containing the following info about the sources:
-%     *  srcinfo.sources: double(3,n)    
-%           source locations, $x_{j}$
-%     *  srcinfo.nd: integer
-%           number of charge/dipole vectors (optional, 
-%           default - nd = 1)
-%     *  srcinfo.charges: double(nd,n) 
-%           charge densities, $c_{j}$ (optional, 
-%           default - term corresponding to charges dropped)
-%     *  srcinfo.dipoles: double(nd,3,n) 
-%           dipole orientation vectors, $v_{j}$ (optional
-%           default - term corresponding to dipoles dropped) 
-%  -  targ: double(3,nt)
-%        target locations, $t_{i}$ (optional)
-%  -  pgt: integer
-%        | target eval flag (optional)
-%        | potential at targets evaluated if pgt = 1
-%        | potential and gradient at targets evaluated if pgt=2 
-%        | potential, gradient and hessian at targets evaluated if pgt=3
-%  
-%  Returns:
-%  -  U.pottarg:  potential at target locations, if requested, $u(t_{i})$
-%  -  U.gradtarg: gradient at target locations, if requested, $\nabla u(t_{i})$
-%  -  U.hesstarg: hessian at target locations, if requested, $\nabla^2 u(t_{i})$
+%  Kernel definitions, input and outputs arguments are identical to
+%  lfmm3d (see that function for all definitions), apart from:
+%  1) the first argument (eps) is absent.
+%  2) there are currently no outputs at sources, meaning that U.pot, U.grad,
+%     and U.hess are missing (as if pg=0). In other words,
+%     just targets for now, and targ is thus not an optional argument.
 %
 % See also: LFMM3D
 
@@ -691,12 +634,27 @@ end
 %  interactions with targets, using the fast multipole method
 %  with precision eps.
 %
-%  This subroutine evaluates sums implied by the operator representation
+%  Specifically, this subroutine computes a sum for the electric field
+%
+%      E(x) = sum_m curl G_k(x,y^{(m)}) h_current_m
+%                 + G_k(x,y^{(m)}) e_current_m 
+%                 + grad G_k(x,y^{(m)}) e_charge_m
+%
+%  for each requested evaluation point x, where h_current and e_current
+%  are 3-vector densities and e_charge is a scalar density supplied
+%  at each source point y^{(m)}. G_k is the Helmholtz Green function
+%  without the 1/(4pi) scaling:
 %
-%      E = curl S_{k}[h_current] + S_{k}[e_current] + grad S_{k}[e_charge]
+%      G_k(x,y) = e^(ik|x-y|)/|x-y|.
 %
-%  by calling the vector Helmholtz FMM.
-%  With appropriate input flags, the subroutine also computes divE, curlE.
+%  In contrast with other FMM routines in the library, this routine
+%  has only 1 option for the evaluation points: they are specified
+%  as targets. If a target x coincides with a source point y^{(m)}
+%  that term in the sum is omitted. 
+%
+%  The electric field is, naturally, a 3-vector at each point x.
+%  With appropriate input flags, the subroutine also computes divE,
+%  curlE. 
 %
 %  Remark: the subroutine uses a stabilized representation
 %  for computing the divergence by using integration by parts
@@ -839,8 +797,7 @@ end
 % U = em3ddir(zk,srcinfo,targ,ifE,ifcurlE,ifdivE)
 %
 %  Maxwell direct evaluation in R^3: evaluate all pairwise particle
-%  interactions (ignoring self-interactions) and
-%  interactions with targs. This is the slow O(N^2) direct code used
+%  interactions with targets. This is the slow O(N^2) direct code used
 %  as a reference for testing the fast code emfmm3d.
 %
 %  Kernel definitions, input and outputs arguments are identical to
@@ -957,14 +914,20 @@ end
 %       u_i(x) = sum_m G_{ij}(x,y^{(m)}) sigma^{(m)}_j
 %                + sum_m T_{ijk}(x,y^{(m)}) mu^{(m)}_j nu^{(m)}_k
 %
-%  for x at all of the target locations, and i=1,2,3,
+%  for each requested evaluation point x and i=1,2,3,
 %  where sigma^{(m)} is the Stokeslet force, mu^{(m)} is the
 %  stresslet strength, and nu^{(m)} is the stresslet orientation
 %  (note that each of these is a 3 vector per source point y^{(m)}).
 %  The stokeslet kernel G and stresslet kernel T are defined below.
 %  Repeated indices are taken as summed over 1,2,3, ie, Einstein
-%  convention. For x a source point, the self-interaction in the
-%  sum is omitted.
+%  convention.
+%
+%  There are two options for the evaluation points $x$. These
+%  can be the sources themselves (ifppreg > 0; see below) or other
+%  target points of interest (ifppregtarg > 0; see below). Both options
+%  can be selected in one call.
+%  When $x=y_{(m)}$, the term corresponding to $y^{(m)}$ is dropped
+%  from the sum.
 %
 %  Optionally, the associated pressure p(x) and 3x3 gradient tensor
 %  grad u(x) are returned,
@@ -999,7 +962,6 @@ end
 %   
 %     T_{ijk}(x,y) = -3 r_i r_j r_k/ r^5
 %     PI_{jk}(x,y) = -2 delta_{jk}/r^3 + 6 r_j r_k/r^5      
-%
 % 
 %  Args:
 %
diff --git a/matlab/hfmm3d.m b/matlab/hfmm3d.m
index 56c2257a..73b48b21 100644
--- a/matlab/hfmm3d.m
+++ b/matlab/hfmm3d.m
@@ -18,7 +18,7 @@
 %  where $c_{j}$ are the charge densities
 %  $v_{j}$ are the dipole orientation vectors, and
 %  $x_{j}$ are the source locations.
-%  
+%
 %  There are two options for the evaluation points $x$. These
 %  can be the sources themselves (pg > 0; see below) or other
 %  target points of interest (pgt > 0; see below). Both options
diff --git a/matlab/stfmm3d.m b/matlab/stfmm3d.m
index d5e66b85..a6889c28 100644
--- a/matlab/stfmm3d.m
+++ b/matlab/stfmm3d.m
@@ -61,7 +61,6 @@
 %   
 %     T_{ijk}(x,y) = -3 r_i r_j r_k/ r^5
 %     PI_{jk}(x,y) = -2 delta_{jk}/r^3 + 6 r_j r_k/r^5      
-%
 % 
 %  Args:
 %