From 71932b4a3289831d40c46153eb9c64c30558760c Mon Sep 17 00:00:00 2001
From: coffee-addict <29309370+coffee-addict@users.noreply.github.com>
Date: Mon, 26 Feb 2024 16:12:22 -0500
Subject: [PATCH] NAG Fortran compiler (#33)

* add makefile for NAG compiler and tweak some for the compilation

* remove added files by mistake

* fixing make.incs and include path for nag compiler

---------

Co-authored-by: KosukeSugita <ks823@njit.edu>
Co-authored-by: Manas Rachh <mrachh@flatironinstitute.org>
Co-authored-by: Manas Rachh <43040789+mrachh@users.noreply.github.com>
---
 .gitignore                              |  1 +
 docs/fortran-c.rst                      |  2 +-
 make.inc.icc                            |  2 +-
 make.inc.macos.gnu                      |  6 +--
 make.inc.macos.intel                    |  8 ++-
 make.inc.macos.nag                      | 30 +++++++++++
 make.inc.windows.mingw                  |  2 +-
 makefile                                | 12 +++--
 src/Common/fmmcommon.f                  |  2 +-
 src/Common/tree_routs3d.f               |  2 +-
 src/Common/yrecursion.f                 |  9 ++--
 src/Helmholtz/hfmm3d.f                  | 14 ++---
 src/Helmholtz/hfmm3d_memest.f           |  4 +-
 src/Helmholtz/hfmm3d_mps.f90            |  5 +-
 src/Helmholtz/hfmm3dwrap_legacy.f       |  2 +-
 src/Helmholtz/hpwrouts.f                | 68 ++++++++++++++-----------
 src/Helmholtz/hwts3e.f                  |  6 +--
 src/Laplace/laprouts3d.f                |  4 +-
 src/Laplace/lfmm3d.f                    |  2 +-
 src/Laplace/lfmm3dwrap_legacy.f         |  8 +--
 src/Laplace/lpwrouts.f                  | 26 ++++++----
 src/Laplace/lwtsexp_sep2.f              |  3 +-
 test/Helmholtz/test_helmrouts3d.make    | 11 +++-
 test/Helmholtz/test_hfmm3d.make         | 10 +++-
 test/Helmholtz/test_hfmm3d_adjoint.make | 11 +++-
 test/Helmholtz/test_hfmm3d_mps.f90      |  3 +-
 test/Helmholtz/test_hfmm3d_mps.make     | 10 +++-
 test/Helmholtz/test_hfmm3d_vec.f        |  3 +-
 test/Helmholtz/test_hfmm3d_vec.make     | 10 +++-
 test/Laplace/test_lfmm3d_vec.f          |  3 +-
 30 files changed, 188 insertions(+), 91 deletions(-)
 create mode 100644 make.inc.macos.nag

diff --git a/.gitignore b/.gitignore
index 31dccfab..760e6d77 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,4 @@ lib/*
 *.log
 *.sh
 sbatch*
+make.inc
diff --git a/docs/fortran-c.rst b/docs/fortran-c.rst
index e75587af..ecb0e6ba 100644
--- a/docs/fortran-c.rst
+++ b/docs/fortran-c.rst
@@ -355,7 +355,7 @@ and the associated pressure
 Here $x_{j}$ are the source locations,
 $\sigma_{j}$ are the Stokeslet densities,
 $\nu_{j}$ are the stresslet orientation vectors, $\mu_{j}$ 
-are the stresslet densities, and rhw xollwxrion of $x$ 
+are the stresslet densities, and the locations $x$ 
 at which the velocity and its gradient are evaluated are referred to
 as the evaluation points.
 
diff --git a/make.inc.icc b/make.inc.icc
index d09add05..4e26423e 100644
--- a/make.inc.icc
+++ b/make.inc.icc
@@ -1,7 +1,7 @@
 CC=icc
 CXX=icpc
 FC=ifort
-FFLAGS= -fPIC -O3 -march=native -funroll-loops -mkl 
+FFLAGS= -fPIC -O3 -march=native -funroll-loops -mkl -w 
 LIBS=-lm 
 CLIBS = -lm -ldl -lifcore
 
diff --git a/make.inc.macos.gnu b/make.inc.macos.gnu
index 09d29211..5051a7ac 100644
--- a/make.inc.macos.gnu
+++ b/make.inc.macos.gnu
@@ -1,6 +1,6 @@
 # makefile overrides
 # OS:       macOS
-# Compiler: gfortran X.X
+# Compiler: gfortran X.X/Clang
 # OpenMP:   enabled
 #
 
@@ -22,8 +22,6 @@ OMPLIBS = -lgomp
 # MATLAB interface:
 FDIR=$$(dirname `gfortran --print-file-name libgfortran.dylib`)
 MFLAGS +=-L${FDIR}
-MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex
-#LIBS = -lm -lstdc++.6
-#MEXLIBS= -lm -lstdc++.6 -lgfortran -ldl
+MEX = $(shell ls -d /Applications/MATLAB_R* | sort | tail -1)/bin/mex
 
 
diff --git a/make.inc.macos.intel b/make.inc.macos.intel
index b51cab39..5797c02c 100644
--- a/make.inc.macos.intel
+++ b/make.inc.macos.intel
@@ -7,7 +7,8 @@
 CC=icc
 CXX=icpc
 FC=ifort
-FFLAGS= -fPIC -O3 -march=native -funroll-loops -qmkl
+
+FFLAGS= -fPIC -O3 -march=native -funroll-loops -mkl -std=legacy -w 
 LIBS=
 #CLIBS = -lm -ldl -lifcore
 CLIBS = -lm -ldl
@@ -28,4 +29,9 @@ endif
 OMPFLAGS = -qopenmp
 OMPLIBS = -qopenmp
 
+# MATLAB interface:
+FDIR=$$(dirname `gfortran --print-file-name libgfortran.dylib`)
+MFLAGS +=-L${FDIR}
+MEX = $(shell ls -d /Applications/MATLAB_R* | sort | tail -1)/bin/mex
+
 
diff --git a/make.inc.macos.nag b/make.inc.macos.nag
new file mode 100644
index 00000000..6c07c8ff
--- /dev/null
+++ b/make.inc.macos.nag
@@ -0,0 +1,30 @@
+# make.inc for NAG Fortran compiler
+# Online documentation: https://www.nag.com/nagware/np/r71_doc/manual/compiler_2_4.html#OPTIONS
+
+FC=nagfor
+
+# The path of libraries by NAG compiler
+LIB_NAG = /usr/local/lib/NAG_Fortran
+
+# Brief descriptions of specified options below:
+# -PIC: produce position-independent code
+# -O2: optimization at a normal level
+# -Ounroll=2: the depth of loo-unrolling
+# -f90_sign: use the Fortran 77/90 version of the SIGN intrinsic instead of the Fortran 95 one 
+# -dcfuns: enable recognition of non-standard double precision complex intrinsic functions.  
+# -dusty: allows the compilation and execution of legacy software.
+# -w=x77: suppresses extension warnings for obsolete but common extensions to Fortran 77.
+# -w=unreffed: suppresses warning messages about variables set but never referenced.
+# -w=unused: suppresses warning messages about unused entities. 
+# -ieee=full: set the mode of IEEE arithmetic operation according to full mode.
+
+# Main compile command for NAG Fortran compiler
+FFLAGS = -PIC -O2 -Ounroll=1 -f90_sign -dcfuns -dusty -w=obs -w=x77 -w=unreffed -w=unused -ieee=full 
+
+# Flags overwritten in makefile
+OMPFLAGS = -openmp
+# OMPLIBS = -lf71omp64 -L$(LIB_NAG)
+OMPLIBS = -lf71omp64 -lf71rts -L$(LIB_NAG)
+LIBS = -lf71rts -L$(LIB_NAG)
+CLIBS = -lm -ldl -L$(LIB_NAG)
+FFLAGS_DYN = -PIC
diff --git a/make.inc.windows.mingw b/make.inc.windows.mingw
index d2c4e49c..0c58923b 100644
--- a/make.inc.windows.mingw
+++ b/make.inc.windows.mingw
@@ -4,7 +4,7 @@
 # OpenMP:   default enabled unless specified
 #
 
-FFLAGS= -fPIC -O3 -funroll-loops -std=legacy 
+FFLAGS= -fPIC -O3 -funroll-loops -std=legacy -w 
 
 DYNAMICLIB = $(LIBNAME).dll
 LIMPLIB = $(LIBNAME)_dll.lib
diff --git a/makefile b/makefile
index a7e35712..e5dafa06 100644
--- a/makefile
+++ b/makefile
@@ -14,7 +14,8 @@ FC=gfortran
 
 
 # set compiler flags for c and fortran
-FFLAGS= -fPIC -O3 -march=native -funroll-loops -std=legacy
+FFLAGS= -fPIC -O3 -march=native -funroll-loops -std=legacy -w
+FFLAGS_DYN= -shared -fPIC
 CFLAGS= -fPIC -O3 -march=native -funroll-loops -std=c99
 CXXFLAGS= -std=c++11 -DSCTL_PROFILE=-1 -fPIC -O3 -march=native -funroll-loops
 
@@ -86,6 +87,7 @@ endif
 # vectorized kernel directory
 SRCDIR = ./vec-kernels/src
 INCDIR = ./vec-kernels/include
+FINCDIR = ./src/Helmholtz
 LIBDIR = lib-static
 
 # objects to compile
@@ -192,10 +194,10 @@ usage:
 	$(CXX) -c $(CXXFLAGS) $< -o $@
 %.o: %.c %.h
 	$(CC) -c $(CFLAGS) $< -o $@
-%.o: %.f %.h
-	$(FC) -c $(FFLAGS) $< -o $@
+%.o: %.f 
+	$(FC) -c $(FFLAGS) -I$(FINCDIR) $< -o $@
 %.o: %.f90
-	$(FC) -c $(FFLAGS) $< -o $@
+	$(FC) -c $(FFLAGS) -I$(FINCDIR) $< -o $@
 
 # build the library...
 lib: $(STATICLIB) $(DYNAMICLIB)
@@ -224,7 +226,7 @@ $(STATICLIB): $(OBJS)
 	ar rcs $(STATICLIB) $(OBJS)
 	mv $(STATICLIB) lib-static/
 $(DYNAMICLIB): $(OBJS)
-	$(FC) -shared -fPIC $(OBJS) -o $(DYNAMICLIB) $(DYLIBS)
+	$(FC) $(FFLAGS_DYN) $(OBJS) -o $(DYNAMICLIB) $(DYLIBS)
 	mv $(DYNAMICLIB) lib/
 	[ ! -f $(LIMPLIB) ] || mv $(LIMPLIB) lib/
 
diff --git a/src/Common/fmmcommon.f b/src/Common/fmmcommon.f
index d516ad67..ccf52b5a 100644
--- a/src/Common/fmmcommon.f
+++ b/src/Common/fmmcommon.f
@@ -218,8 +218,8 @@ subroutine ireorderi(ndim,n,arr,arrsort,iarr)
 c
       subroutine drescale(n,a,r)
       implicit none
-      real *8 a(n),r
       integer i,n
+      real *8 a(n),r
 
 C$OMP PARALLEL DO DEFAULT(SHARED)
       do i=1,n
diff --git a/src/Common/tree_routs3d.f b/src/Common/tree_routs3d.f
index 36bcfa45..a6cfc849 100644
--- a/src/Common/tree_routs3d.f
+++ b/src/Common/tree_routs3d.f
@@ -1125,11 +1125,11 @@ subroutine getlist4pwdirtest(dir,censrc,centrg,boxsize)
       subroutine subdividebox(pos,npts,center,boxsize,
      1           isorted,iboxfl,subcenters)
       implicit none
+      integer npts
       double precision pos(3,npts)
       double precision center(3)
       double precision subcenters(3,8)
       double precision boxsize
-      integer npts
       integer isorted(*)
       integer iboxfl(2,8)
 
diff --git a/src/Common/yrecursion.f b/src/Common/yrecursion.f
index 1168a948..3dae2934 100644
--- a/src/Common/yrecursion.f
+++ b/src/Common/yrecursion.f
@@ -1430,7 +1430,8 @@ subroutine zylgndrbr(nmax, z, y)
 c     branch cut at (0,+i), select the lower branch 
 c     of complex square root
 c
-      if( imag(1-z*z) .gt. 0 .and. real(1-z*z) .lt. 0) u=+sqrt(1-z*z)
+c      if( imag(1-z*z) .gt. 0 .and. real(1-z*z) .lt. 0) u=+sqrt(1-z*z)
+      if( dimag(1-z*z) .gt. 0 .and. real(1-z*z) .lt. 0) u=+sqrt(1-z*z)
 ccc      call prin2('in zylgndrbr, u=*', -u, 2)
 ccc      call prin2('in zylgndrbr, 1-z^2=*', 1-z*z, 2)
 c
@@ -1498,8 +1499,10 @@ subroutine zylgndrsc(nmax, z,scale, ysc)
 c
       ztmp = 1-z*z
       u=-sqrt(ztmp)
-      if(abs(imag(z)).le.1.0d-16.and.abs(real(z)).gt.1) then
-        if(imag(u).lt.0) u = dconjg(u)
+c      if(abs(imag(z)).le.1.0d-16.and.abs(real(z)).gt.1) then
+c        if(imag(u).lt.0) u = dconjg(u)
+      if(abs(dimag(z)).le.1.0d-16.and.abs(real(z)).gt.1) then
+        if(dimag(u).lt.0) u = dconjg(u)
       endif
       ysc(0,0)=1
       do m=0, nmax
diff --git a/src/Helmholtz/hfmm3d.f b/src/Helmholtz/hfmm3d.f
index 77b547cb..ec9308ae 100644
--- a/src/Helmholtz/hfmm3d.f
+++ b/src/Helmholtz/hfmm3d.f
@@ -562,6 +562,7 @@ subroutine hfmm3dmain(nd,eps,zk,
       double complex jsort(nd,0:ntj,-ntj:ntj,nexpc)
 
 
+      integer nboxes
       integer *8 iaddr(2,nboxes), lmptot
       double precision rmlexp(lmptot)
        
@@ -575,7 +576,6 @@ subroutine hfmm3dmain(nd,eps,zk,
       integer nterms(0:nlevels)
       integer *8 ipointer(8),ltree
       integer itree(ltree)
-      integer nboxes
       double precision rscales(0:nlevels)
       double precision boxsize(0:nlevels)
       integer isrcse(2,nboxes),itargse(2,nboxes),iexpcse(2,nboxes)
@@ -752,7 +752,7 @@ subroutine hfmm3dmain(nd,eps,zk,
 
       zkiupbound = 12*pi
       zkrupbound = 16*pi
-      zi = imag(zk)
+      zi = dimag(zk)
 
       ilevcutoff = -1
 
@@ -1054,7 +1054,7 @@ subroutine hfmm3dmain(nd,eps,zk,
         allocate(iboxlexp(nd*(nterms(ilev)+1)*
      1           (2*nterms(ilev)+1),8,nthd))
         zk2 = zk*boxsize(ilev)
-        if(real(zk2).le.zkrupbound.and.imag(zk2).lt.zkiupbound.and.
+        if(real(zk2).le.zkrupbound.and.dimag(zk2).lt.zkiupbound.and.
      1        ilev.gt.ilevcutoff) then
 c             get new pw quadrature
             
@@ -1562,7 +1562,7 @@ subroutine hfmm3dmain(nd,eps,zk,
           deallocate(pgboxwexp)
 
 
-        else if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound).
+        else if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound).
      1            and.ilev.gt.ilevcutoff) then
           nquad2 = nterms(ilev)*2.2
           if(ifprint.ge.1) print *, "In point and shoot regime"
@@ -1719,7 +1719,7 @@ subroutine hfmm3dmain(nd,eps,zk,
       if(ifcharge.eq.1.and.ifdipole.eq.0) then
         do ilev=1,nlevels
           zk2 = zk*boxsize(ilev)
-          if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound).
+          if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound).
      1            and.ilev.gt.ilevcutoff) then
 
 C$OMP PARALLEL DO DEFAULT(SHARED)
@@ -1754,7 +1754,7 @@ subroutine hfmm3dmain(nd,eps,zk,
       if(ifcharge.eq.0.and.ifdipole.eq.1) then
         do ilev=1,nlevels
           zk2 = zk*boxsize(ilev)
-          if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound).
+          if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound).
      1            and.ilev.gt.ilevcutoff) then
 
 C$OMP PARALLEL DO DEFAULT(SHARED)
@@ -1789,7 +1789,7 @@ subroutine hfmm3dmain(nd,eps,zk,
       if(ifcharge.eq.1.and.ifdipole.eq.1) then
         do ilev=1,nlevels
           zk2 = zk*boxsize(ilev)
-          if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound).
+          if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound).
      1            and.ilev.gt.ilevcutoff) then
 
 C$OMP PARALLEL DO DEFAULT(SHARED)
diff --git a/src/Helmholtz/hfmm3d_memest.f b/src/Helmholtz/hfmm3d_memest.f
index 40d95fb2..8ea7ba34 100644
--- a/src/Helmholtz/hfmm3d_memest.f
+++ b/src/Helmholtz/hfmm3d_memest.f
@@ -377,7 +377,7 @@ subroutine hfmm3d_memest(nd,eps,zk,nsource,source,ifcharge,
 
       zkiupbound = 12*pi
       zkrupbound = 16*pi
-      zi = imag(zkfmm)
+      zi = dimag(zkfmm)
 
       ilevcutoff = -1
 
@@ -391,7 +391,7 @@ subroutine hfmm3d_memest(nd,eps,zk,nsource,source,ifcharge,
 
       do ilev=2,nlevels
         zk2 = zkfmm*boxsize(ilev)
-        if(real(zk2).le.zkrupbound.and.imag(zk2).lt.zkiupbound.and.
+        if(real(zk2).le.zkrupbound.and.dimag(zk2).lt.zkiupbound.and.
      1     ilev.gt.ilevcutoff) then
 
           ier = 0
diff --git a/src/Helmholtz/hfmm3d_mps.f90 b/src/Helmholtz/hfmm3d_mps.f90
index 9946c6ee..f19847c2 100644
--- a/src/Helmholtz/hfmm3d_mps.f90
+++ b/src/Helmholtz/hfmm3d_mps.f90
@@ -393,7 +393,7 @@ subroutine hfmm3dmain_mps(nd, eps, zk, &
   integer :: impolesort(nmpole)
 
   ! storage stuff for tree and multipole expansions
-  integer :: lmptemp
+  integer :: lmptemp,nboxes
   integer *8 :: iaddr(2,nboxes), lmptot
   double precision :: rmlexp(lmptot)
   double precision :: mptemp(lmptemp)
@@ -406,7 +406,6 @@ subroutine hfmm3dmain_mps(nd, eps, zk, &
   integer :: nterms(0:nlevels)
   integer *8 :: ipointer(8)
   integer :: itree(ltree)
-  integer :: nboxes
   integer :: mnbors,mnlist1, mnlist2,mnlist3,mnlist4
   integer :: isrcse(2,nmpole)
   integer, allocatable :: nlist1(:),list1(:,:)
@@ -819,7 +818,7 @@ subroutine hfmm3dmain_mps(nd, eps, zk, &
 
       ! load the necessary quadrature for plane waves
       zk2 = zk*boxsize(ilev)
-      if ( (real(zk2).le.16*pi) .and. (imag(zk2).le.12*pi) &
+      if ( (real(zk2).le.16*pi) .and. (dimag(zk2).le.12*pi) &
           .and. (ifmp .eq. 0) ) then
         ier = 0
 
diff --git a/src/Helmholtz/hfmm3dwrap_legacy.f b/src/Helmholtz/hfmm3dwrap_legacy.f
index a296fa48..87c7233c 100644
--- a/src/Helmholtz/hfmm3dwrap_legacy.f
+++ b/src/Helmholtz/hfmm3dwrap_legacy.f
@@ -195,12 +195,12 @@ subroutine hfmm3dparttarg(ier,iprec,zk,nsource,source,
       double complex charge(nsource),dipstr(nsource)
       double precision dipvec(3,nsource)
 
+      integer ntarg
       integer ifpot,iffld,ifpottarg,iffldtarg
       double complex  pot(nsource),fld(3,nsource)
       double complex  pottarg(ntarg),fldtarg(3,ntarg)
 
       integer nd,ifpgh,ifpghtarg
-      integer ntarg
       double precision targ(3,ntarg)
       double complex, allocatable :: dipvec_in(:,:)
       double complex, allocatable :: pottmp(:),gradtmp(:,:)
diff --git a/src/Helmholtz/hpwrouts.f b/src/Helmholtz/hpwrouts.f
index 93d17864..cd5ec7fc 100644
--- a/src/Helmholtz/hpwrouts.f
+++ b/src/Helmholtz/hpwrouts.f
@@ -145,33 +145,33 @@ subroutine hmkexps(rlams,nlambs,numphys,nexptotp,zk,xs,ys,zs)
          do 200 mth = 1,numphys(nl)
             u = (mth-1)*hu
             ncurrent = ntot+mth
-            zs(1,ncurrent) = cdexp(-rlams(nl) )
-            zs(2,ncurrent) = cdexp(-2.0d0*rlams(nl) )
-            zs(3,ncurrent) = cdexp(-3.0d0*rlams(nl) )
-            zs(4,ncurrent) = cdexp(-4.0d0*rlams(nl) )
-            zs(5,ncurrent) = cdexp(-5.0d0*rlams(nl) )
-            xs(-1,ncurrent) = cdexp(-ima*rk*cos(u))
-            xs(-2,ncurrent) = cdexp(-ima*rk*2.0d0*cos(u))
-            xs(-3,ncurrent) = cdexp(-ima*rk*3.0d0*cos(u))
-            xs(-4,ncurrent) = cdexp(-ima*rk*4.0d0*cos(u))
-            xs(-5,ncurrent) = cdexp(-ima*rk*5.0d0*cos(u))
+            zs(1,ncurrent) = exp(-rlams(nl) )
+            zs(2,ncurrent) = exp(-2.0d0*rlams(nl) )
+            zs(3,ncurrent) = exp(-3.0d0*rlams(nl) )
+            zs(4,ncurrent) = exp(-4.0d0*rlams(nl) )
+            zs(5,ncurrent) = exp(-5.0d0*rlams(nl) )
+            xs(-1,ncurrent) = exp(-ima*rk*cos(u))
+            xs(-2,ncurrent) = exp(-ima*rk*2.0d0*cos(u))
+            xs(-3,ncurrent) = exp(-ima*rk*3.0d0*cos(u))
+            xs(-4,ncurrent) = exp(-ima*rk*4.0d0*cos(u))
+            xs(-5,ncurrent) = exp(-ima*rk*5.0d0*cos(u))
             xs(0,ncurrent) = 1
-            xs(1,ncurrent) = cdexp(ima*rk*cos(u))
-            xs(2,ncurrent) = cdexp(ima*rk*2.0d0*cos(u))
-            xs(3,ncurrent) = cdexp(ima*rk*3.0d0*cos(u))
-            xs(4,ncurrent) = cdexp(ima*rk*4.0d0*cos(u))
-            xs(5,ncurrent) = cdexp(ima*rk*5.0d0*cos(u))
-            ys(-1,ncurrent) = cdexp(-ima*rk*dsin(u))
-            ys(-2,ncurrent) = cdexp(-ima*rk*2.0d0*dsin(u))
-            ys(-3,ncurrent) = cdexp(-ima*rk*3.0d0*dsin(u))
-            ys(-4,ncurrent) = cdexp(-ima*rk*4.0d0*dsin(u))
-            ys(-5,ncurrent) = cdexp(-ima*rk*5.0d0*dsin(u))
+            xs(1,ncurrent) = exp(ima*rk*cos(u))
+            xs(2,ncurrent) = exp(ima*rk*2.0d0*cos(u))
+            xs(3,ncurrent) = exp(ima*rk*3.0d0*cos(u))
+            xs(4,ncurrent) = exp(ima*rk*4.0d0*cos(u))
+            xs(5,ncurrent) = exp(ima*rk*5.0d0*cos(u))
+            ys(-1,ncurrent) = exp(-ima*rk*dsin(u))
+            ys(-2,ncurrent) = exp(-ima*rk*2.0d0*dsin(u))
+            ys(-3,ncurrent) = exp(-ima*rk*3.0d0*dsin(u))
+            ys(-4,ncurrent) = exp(-ima*rk*4.0d0*dsin(u))
+            ys(-5,ncurrent) = exp(-ima*rk*5.0d0*dsin(u))
             ys(0,ncurrent) = 1
-            ys(1,ncurrent) = cdexp(ima*rk*dsin(u))
-            ys(2,ncurrent) = cdexp(ima*rk*2.0d0*dsin(u))
-            ys(3,ncurrent) = cdexp(ima*rk*3.0d0*dsin(u))
-            ys(4,ncurrent) = cdexp(ima*rk*4.0d0*dsin(u))
-            ys(5,ncurrent) = cdexp(ima*rk*5.0d0*dsin(u))
+            ys(1,ncurrent) = exp(ima*rk*dsin(u))
+            ys(2,ncurrent) = exp(ima*rk*2.0d0*dsin(u))
+            ys(3,ncurrent) = exp(ima*rk*3.0d0*dsin(u))
+            ys(4,ncurrent) = exp(ima*rk*4.0d0*dsin(u))
+            ys(5,ncurrent) = exp(ima*rk*5.0d0*dsin(u))
 200      continue
          ntot = ntot+numphys(nl)
 400   continue
@@ -2599,10 +2599,11 @@ subroutine hpw_ud_eval_p(nd,zk2,center,boxsize,ntarg,targ,nlam,
      1   whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg)
       complex *16 rlams(nlam),pot(nd,ntarg)
       complex *16 whts(nlam),zk2
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:),cc2(:)
@@ -2672,10 +2673,11 @@ subroutine hpw_ns_eval_p(nd,zk2,center,boxsize,ntarg,targ,nlam,
      1   rlams,whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg)
       complex *16 rlams(nlam),pot(nd,ntarg)
       complex *16 whts(nlam),zk2
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:),cc2(:)
@@ -2745,10 +2747,11 @@ subroutine hpw_ew_eval_p(nd,zk2,center,boxsize,ntarg,targ,nlam,
      1   rlams,whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg)
       complex *16 rlams(nlam),pot(nd,ntarg)
       complex *16 whts(nlam),zk2
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:),cc2(:)
@@ -2819,10 +2822,11 @@ subroutine hpw_ud_eval_g(nd,zk2,center,boxsize,ntarg,targ,nlam,
      2   grad)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg)
       complex *16 rlams(nlam),pot(nd,ntarg)
       complex *16 grad(nd,3,ntarg),whts(nlam),zk2
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:),crc(:),crs(:),cc2(:)
@@ -2903,10 +2907,11 @@ subroutine hpw_ns_eval_g(nd,zk2,center,boxsize,ntarg,targ,nlam,
      2  grad)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg)
       complex *16 rlams(nlam),pot(nd,ntarg)
       complex *16 grad(nd,3,ntarg),whts(nlam),zk2
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:),crc(:),crs(:),cc2(:)
@@ -2985,10 +2990,11 @@ subroutine hpw_ew_eval_g(nd,zk2,center,boxsize,ntarg,targ,nlam,
      2  grad)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg)
       complex *16 rlams(nlam),pot(nd,ntarg)
       complex *16 grad(nd,3,ntarg),whts(nlam),zk2
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:),crc(:),crs(:),cc2(:)
diff --git a/src/Helmholtz/hwts3e.f b/src/Helmholtz/hwts3e.f
index 1b9de37f..7e45f173 100644
--- a/src/Helmholtz/hwts3e.f
+++ b/src/Helmholtz/hwts3e.f
@@ -148,8 +148,8 @@ subroutine hwts3e(ier,eps,rk,cxs,cws,n)
 c
         gamma=0
         v=u/(1+gamma*abs(dble(-ima*rk)))
-        cxs(k)=u-ima*rk+abs(imag(-ima*rk))*ima*(v/(1+v))
-        cws(k)=1+abs(imag(-ima*rk))*ima*((1+v)-v)/(1+v)**2
+        cxs(k)=u-ima*rk+abs(dimag(-ima*rk))*ima*(v/(1+v))
+        cws(k)=1+abs(dimag(-ima*rk))*ima*((1+v)-v)/(1+v)**2
      $     /(1+gamma*abs(dble(-ima*rk)))
         cws(k)=cws(k)*ws(k)*uweight
         endif
@@ -275,7 +275,7 @@ subroutine hwts3dgetd(ier,rk,idomain)
         cy(24)=16*pi
 c
         rkrea=dble(rk)
-        rkima=imag(rk)
+        rkima=dimag(rk)
 c
         idomain = 0
         ier = 0
diff --git a/src/Laplace/laprouts3d.f b/src/Laplace/laprouts3d.f
index 93a8c4ae..68afedbe 100644
--- a/src/Laplace/laprouts3d.f
+++ b/src/Laplace/laprouts3d.f
@@ -406,7 +406,7 @@ subroutine l3dmpevalg(nd,rscale,center,mpole,nterms,
               pot(idim,itarg)=pot(idim,itarg)+rtmp1*rtmp2
               ur(idim) = ur(idim) + rtmp4*rtmp2
               utheta(idim) = utheta(idim)+rtmp5*rtmp2
-              rtmp2 = 2*imag(mpole(idim,n,m)*ephi(m))
+              rtmp2 = 2*dimag(mpole(idim,n,m)*ephi(m))
               uphi(idim) = uphi(idim) + rtmp6*rtmp2
             enddo
           enddo
@@ -1272,7 +1272,7 @@ subroutine l3dtaevalg(nd,rscale,center,mpole,nterms,
               pot(idim,itarg)=pot(idim,itarg)+rtmp1*rtmp2
               ur(idim) = ur(idim) + rtmp4*rtmp2
               utheta(idim) = utheta(idim)+rtmp5*rtmp2
-              rtmp2 = 2*imag(mpole(idim,n,m)*ephi(m))
+              rtmp2 = 2*dimag(mpole(idim,n,m)*ephi(m))
               uphi(idim) = uphi(idim) + rtmp6*rtmp2
             enddo
           enddo
diff --git a/src/Laplace/lfmm3d.f b/src/Laplace/lfmm3d.f
index f8437ae6..2e26cca8 100644
--- a/src/Laplace/lfmm3d.f
+++ b/src/Laplace/lfmm3d.f
@@ -572,6 +572,7 @@ subroutine lfmm3dmain(nd,eps,
       double complex tsort(nd,0:ntj,-ntj:ntj,nexpc)
       double precision scjsort(nexpc)
 
+      integer nboxes
       integer *8 iaddr(2,nboxes), lmptot
       integer lmptemp
       double precision rmlexp(lmptot)
@@ -588,7 +589,6 @@ subroutine lfmm3dmain(nd,eps,
       integer nterms(0:nlevels)
       integer *8 ipointer(8),ltree
       integer itree(ltree)
-      integer nboxes
       double precision rscales(0:nlevels)
       double precision boxsize(0:nlevels)
       integer isrcse(2,nboxes),itargse(2,nboxes),iexpcse(2,nboxes)
diff --git a/src/Laplace/lfmm3dwrap_legacy.f b/src/Laplace/lfmm3dwrap_legacy.f
index 0946dcf0..5207a887 100644
--- a/src/Laplace/lfmm3dwrap_legacy.f
+++ b/src/Laplace/lfmm3dwrap_legacy.f
@@ -427,7 +427,7 @@ subroutine l3dpartdirect(nsource,
 C$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
         do i=1,ns
           charge_in(1,i) = real(charge(i))
-          charge_in(2,i) = imag(charge(i))
+          charge_in(2,i) = dimag(charge(i))
         enddo
 C$OMP END PARALLEL DO
         if(ifdipole.ne.1) allocate(dipvec_in(2,3,1))
@@ -439,11 +439,11 @@ subroutine l3dpartdirect(nsource,
 C$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
         do i=1,ns
           dipvec_in(1,1,i) = real(dipstr(i))*dipvec(1,i)
-          dipvec_in(2,1,i) = imag(dipstr(i))*dipvec(1,i)
+          dipvec_in(2,1,i) = dimag(dipstr(i))*dipvec(1,i)
           dipvec_in(1,2,i) = real(dipstr(i))*dipvec(2,i)
-          dipvec_in(2,2,i) = imag(dipstr(i))*dipvec(2,i)
+          dipvec_in(2,2,i) = dimag(dipstr(i))*dipvec(2,i)
           dipvec_in(1,3,i) = real(dipstr(i))*dipvec(3,i)
-          dipvec_in(2,3,i) = imag(dipstr(i))*dipvec(3,i)
+          dipvec_in(2,3,i) = dimag(dipstr(i))*dipvec(3,i)
         enddo
 C$OMP END PARALLEL DO
         if(ifcharge.ne.1) allocate(charge_in(2,1))
diff --git a/src/Laplace/lpwrouts.f b/src/Laplace/lpwrouts.f
index ce9ae266..aa1a7c86 100644
--- a/src/Laplace/lpwrouts.f
+++ b/src/Laplace/lpwrouts.f
@@ -153,11 +153,11 @@ subroutine mkfexp(nlambs,numfour,numphys,fexpe,fexpo,fexpback)
         do j=1,nalpha
           alpha=(j-1)*halpha
 	      do mm = 2,numfour(i),2
-            fexpe(nexte)  = cdexp(ima*(mm-1)*alpha)
+            fexpe(nexte)  = exp(ima*(mm-1)*alpha)
 	        nexte = nexte + 1
           enddo
 	      do mm = 3,numfour(i),2
-            fexpo(nexto)  = cdexp(ima*(mm-1)*alpha)
+            fexpo(nexto)  = exp(ima*(mm-1)*alpha)
 	        nexto = nexto + 1
           enddo
         enddo
@@ -170,7 +170,7 @@ subroutine mkfexp(nlambs,numfour,numphys,fexpe,fexpo,fexpback)
 	    do mm = 2,numfour(i)
           do j=1,nalpha
             alpha=(j-1)*halpha
-            fexpback(next)  = cdexp(-ima*(mm-1)*alpha)
+            fexpback(next)  = exp(-ima*(mm-1)*alpha)
 	        next = next + 1
           enddo
         enddo
@@ -637,7 +637,7 @@ subroutine ftophys(nd,mexpf,nlambs,rlams,numfour,numphys,
           enddo
           do mm = 2,numfour(i),2
             do idim=1,nd
-              rtmp = 2*imag(fexpe(nexte)*mexpf(idim,nftot+mm))
+              rtmp = 2*dimag(fexpe(nexte)*mexpf(idim,nftot+mm))
               mexpphys(idim,nptot+ival) = mexpphys(idim,nptot+ival) +
      1                dcmplx(0.0d0,rtmp)
             enddo
@@ -2484,9 +2484,10 @@ subroutine lpw_ud_eval_p(nd,center,boxsize,ntarg,targ,nlam,rlams,
      1   whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg)
       real *8 whts(nlam)
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:)
@@ -2551,9 +2552,10 @@ subroutine lpw_ns_eval_p(nd,center,boxsize,ntarg,targ,nlam,rlams,
      1   whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg)
       real *8 whts(nlam)
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:)
@@ -2616,9 +2618,10 @@ subroutine lpw_ew_eval_p(nd,center,boxsize,ntarg,targ,nlam,rlams,
      1   whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg)
       real *8 whts(nlam)
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:)
@@ -2681,10 +2684,11 @@ subroutine lpw_ud_eval_g(nd,center,boxsize,ntarg,targ,nlam,rlams,
      1   whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot,grad)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg)
       real *8 grad(nd,3,ntarg)
       real *8 whts(nlam)
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:),crc(:),crs(:)
@@ -2762,10 +2766,11 @@ subroutine lpw_ns_eval_g(nd,center,boxsize,ntarg,targ,nlam,rlams,
      1   whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot,grad)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg)
       real *8 grad(nd,3,ntarg)
       real *8 whts(nlam)
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:),crc(:),crs(:)
@@ -2841,10 +2846,11 @@ subroutine lpw_ew_eval_g(nd,center,boxsize,ntarg,targ,nlam,rlams,
      1   whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot,grad)
       implicit none
       integer nd
+      integer ntarg,nlam
       real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg)
       real *8 grad(nd,3,ntarg)
       real *8 whts(nlam)
-      integer ntarg,nlam,nphys(nlam),nexptotp,nphmax
+      integer nphys(nlam),nexptotp,nphmax
       complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp)
       complex *16 ima
       complex *16, allocatable :: cc(:),crc(:),crs(:)
diff --git a/src/Laplace/lwtsexp_sep2.f b/src/Laplace/lwtsexp_sep2.f
index 69d81e8f..5c0c8fe9 100644
--- a/src/Laplace/lwtsexp_sep2.f
+++ b/src/Laplace/lwtsexp_sep2.f
@@ -4530,7 +4530,8 @@ subroutine lwtsexp3sep2(n,xs,ws,err)
 c
 	subroutine numthetasix(numtets,nlams)
         implicit none
-	integer numtets(nlams),nlams 
+	integer nlams 
+	integer numtets(nlams)
 c
 c       This routine returns the number of Fourier modes needed in the
 c       phi integral for each of the discrete lambda values given
diff --git a/test/Helmholtz/test_helmrouts3d.make b/test/Helmholtz/test_helmrouts3d.make
index a6d5799f..9acf9cf3 100644
--- a/test/Helmholtz/test_helmrouts3d.make
+++ b/test/Helmholtz/test_helmrouts3d.make
@@ -1,5 +1,6 @@
 #HOST = gcc
-HOST = gcc-openmp
+#HOST = gcc-openmp
+HOST = nag
 
 PROJECT = int2-helmrouts3d
 
@@ -16,6 +17,14 @@ ifeq ($(HOST),gcc-openmp)
     FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy
 endif
 
+ifeq ($(HOST),nag)
+    FC=nagfor
+    FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp
+    OMPFLAGS= -openmp
+    OMPLIBS = -lf71omp64 
+    CLIBS = -lm -ldl
+endif
+
 # Test objects
 #
 COM = ../../src/Common
diff --git a/test/Helmholtz/test_hfmm3d.make b/test/Helmholtz/test_hfmm3d.make
index e0adbc61..af2ea354 100644
--- a/test/Helmholtz/test_hfmm3d.make
+++ b/test/Helmholtz/test_hfmm3d.make
@@ -1,5 +1,6 @@
 #HOST = gcc
-HOST = gcc-openmp
+#HOST = gcc-openmp
+HOST = nag
 
 PROJECT = int2-hfmm3d
 
@@ -16,6 +17,13 @@ ifeq ($(HOST),gcc-openmp)
     FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy
 endif
 
+ifeq ($(HOST),nag)
+    FC=nagfor
+    FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp
+    OMPFLAGS= -openmp
+    OMPLIBS = -lf71omp64 
+    CLIBS = -lm -ldl
+endif
 
 # Test objects
 #
diff --git a/test/Helmholtz/test_hfmm3d_adjoint.make b/test/Helmholtz/test_hfmm3d_adjoint.make
index 7bad9d3a..76661492 100644
--- a/test/Helmholtz/test_hfmm3d_adjoint.make
+++ b/test/Helmholtz/test_hfmm3d_adjoint.make
@@ -1,5 +1,6 @@
 #HOST = gcc
-HOST = gcc-openmp
+#HOST = gcc-openmp
+HOST = nag
 
 PROJECT = int2-hfmm3d-mps
 
@@ -16,6 +17,14 @@ ifeq ($(HOST),gcc-openmp)
     FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy
 endif
 
+ifeq ($(HOST),nag)
+    FC=nagfor
+#    FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full
+    FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp
+    OMPFLAGS= -openmp
+    OMPLIBS = -lf71omp64 
+    CLIBS = -lm -ldl
+endif
 
 # Test objects
 #
diff --git a/test/Helmholtz/test_hfmm3d_mps.f90 b/test/Helmholtz/test_hfmm3d_mps.f90
index 32b4fd17..93e65725 100644
--- a/test/Helmholtz/test_hfmm3d_mps.f90
+++ b/test/Helmholtz/test_hfmm3d_mps.f90
@@ -303,6 +303,7 @@ subroutine comperr_vec(nd,zk,ns,source,ifcharge,charge,ifdipole, &
   double complex zk
   integer ns,nt,ifcharge,ifdipole,ifpgh,ifpghtarg
 
+  integer nd
   double precision source(3,*),targ(3,*)
   double complex dipvec(nd,3,*)
   double complex charge(nd,*)
@@ -310,7 +311,7 @@ subroutine comperr_vec(nd,zk,ns,source,ifcharge,charge,ifdipole, &
   double complex pot(nd,*),pottarg(nd,*),grad(nd,3,*), &
       gradtarg(nd,3,*)
 
-  integer i,j,ntest,nd,idim
+  integer i,j,ntest,idim
 
   double precision err,ra
 
diff --git a/test/Helmholtz/test_hfmm3d_mps.make b/test/Helmholtz/test_hfmm3d_mps.make
index 5d21c4db..c201b9ad 100644
--- a/test/Helmholtz/test_hfmm3d_mps.make
+++ b/test/Helmholtz/test_hfmm3d_mps.make
@@ -1,5 +1,6 @@
 #HOST = gcc
-HOST = gcc-openmp
+#HOST = gcc-openmp
+HOST = nag
 
 PROJECT = int2-hfmm3d-mps
 
@@ -16,6 +17,13 @@ ifeq ($(HOST),gcc-openmp)
     FFLAGS=-fPIC -O3 -march=native -fopenmp -std=legacy
 endif
 
+ifeq ($(HOST),nag)
+    FC=nagfor
+    FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp
+    OMPFLAGS= -openmp
+    OMPLIBS = -lf71omp64 
+    CLIBS = -lm -ldl
+endif
 
 # Test objects
 #
diff --git a/test/Helmholtz/test_hfmm3d_vec.f b/test/Helmholtz/test_hfmm3d_vec.f
index 6c50a4d4..e6485004 100644
--- a/test/Helmholtz/test_hfmm3d_vec.f
+++ b/test/Helmholtz/test_hfmm3d_vec.f
@@ -701,6 +701,7 @@ subroutine comperr_vec(nd,zk,ns,source,ifcharge,charge,ifdipole,
       double complex zk
       integer ns,nt,ifcharge,ifdipole,ifpgh,ifpghtarg
       
+      integer nd
       double precision source(3,*),targ(3,*)
       double complex dipvec(nd,3,*)
       double complex charge(nd,*)
@@ -708,7 +709,7 @@ subroutine comperr_vec(nd,zk,ns,source,ifcharge,charge,ifdipole,
       double complex pot(nd,*),pottarg(nd,*),grad(nd,3,*),
      1    gradtarg(nd,3,*)
 
-      integer i,j,ntest,nd,idim
+      integer i,j,ntest,idim
 
       double precision err,ra
       
diff --git a/test/Helmholtz/test_hfmm3d_vec.make b/test/Helmholtz/test_hfmm3d_vec.make
index 3fd1d7f6..6620ae62 100644
--- a/test/Helmholtz/test_hfmm3d_vec.make
+++ b/test/Helmholtz/test_hfmm3d_vec.make
@@ -1,5 +1,6 @@
 #HOST = gcc
-HOST = gcc-openmp
+#HOST = gcc-openmp
+HOST = nag
 
 PROJECT = int2-hfmm3d-vec
 
@@ -16,6 +17,13 @@ ifeq ($(HOST),gcc-openmp)
     FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy
 endif
 
+ifeq ($(HOST),nag)
+    FC=nagfor
+    FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp
+    OMPFLAGS= -openmp
+    OMPLIBS = -lf71omp64 
+    CLIBS = -lm -ldl
+endif
 
 # Test objects
 #
diff --git a/test/Laplace/test_lfmm3d_vec.f b/test/Laplace/test_lfmm3d_vec.f
index 22089db1..f9fbdd3d 100644
--- a/test/Laplace/test_lfmm3d_vec.f
+++ b/test/Laplace/test_lfmm3d_vec.f
@@ -989,6 +989,7 @@ subroutine comperr_vec(nd,ns,source,ifcharge,charge,ifdipole,
       double complex zk
       integer ns,nt,ifcharge,ifdipole,ifpgh,ifpghtarg
       
+      integer nd
       double precision source(3,*),targ(3,*)
       double precision dipvec(nd,3,*)
       double precision charge(nd,*)
@@ -997,7 +998,7 @@ subroutine comperr_vec(nd,ns,source,ifcharge,charge,ifdipole,
       double precision gradtarg(nd,3,*)
       double precision hess(nd,6,*),hesstarg(nd,6,*)
 
-      integer i,j,ntest,nd,l,idim
+      integer i,j,ntest,l,idim
 
       double precision err,ra