From 71932b4a3289831d40c46153eb9c64c30558760c Mon Sep 17 00:00:00 2001 From: coffee-addict <29309370+coffee-addict@users.noreply.github.com> Date: Mon, 26 Feb 2024 16:12:22 -0500 Subject: [PATCH] NAG Fortran compiler (#33) * add makefile for NAG compiler and tweak some for the compilation * remove added files by mistake * fixing make.incs and include path for nag compiler --------- Co-authored-by: KosukeSugita Co-authored-by: Manas Rachh Co-authored-by: Manas Rachh <43040789+mrachh@users.noreply.github.com> --- .gitignore | 1 + docs/fortran-c.rst | 2 +- make.inc.icc | 2 +- make.inc.macos.gnu | 6 +-- make.inc.macos.intel | 8 ++- make.inc.macos.nag | 30 +++++++++++ make.inc.windows.mingw | 2 +- makefile | 12 +++-- src/Common/fmmcommon.f | 2 +- src/Common/tree_routs3d.f | 2 +- src/Common/yrecursion.f | 9 ++-- src/Helmholtz/hfmm3d.f | 14 ++--- src/Helmholtz/hfmm3d_memest.f | 4 +- src/Helmholtz/hfmm3d_mps.f90 | 5 +- src/Helmholtz/hfmm3dwrap_legacy.f | 2 +- src/Helmholtz/hpwrouts.f | 68 ++++++++++++++----------- src/Helmholtz/hwts3e.f | 6 +-- src/Laplace/laprouts3d.f | 4 +- src/Laplace/lfmm3d.f | 2 +- src/Laplace/lfmm3dwrap_legacy.f | 8 +-- src/Laplace/lpwrouts.f | 26 ++++++---- src/Laplace/lwtsexp_sep2.f | 3 +- test/Helmholtz/test_helmrouts3d.make | 11 +++- test/Helmholtz/test_hfmm3d.make | 10 +++- test/Helmholtz/test_hfmm3d_adjoint.make | 11 +++- test/Helmholtz/test_hfmm3d_mps.f90 | 3 +- test/Helmholtz/test_hfmm3d_mps.make | 10 +++- test/Helmholtz/test_hfmm3d_vec.f | 3 +- test/Helmholtz/test_hfmm3d_vec.make | 10 +++- test/Laplace/test_lfmm3d_vec.f | 3 +- 30 files changed, 188 insertions(+), 91 deletions(-) create mode 100644 make.inc.macos.nag diff --git a/.gitignore b/.gitignore index 31dccfab..760e6d77 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ lib/* *.log *.sh sbatch* +make.inc diff --git a/docs/fortran-c.rst b/docs/fortran-c.rst index e75587af..ecb0e6ba 100644 --- a/docs/fortran-c.rst +++ b/docs/fortran-c.rst @@ -355,7 +355,7 @@ and the associated pressure Here $x_{j}$ are the source locations, $\sigma_{j}$ are the Stokeslet densities, $\nu_{j}$ are the stresslet orientation vectors, $\mu_{j}$ -are the stresslet densities, and rhw xollwxrion of $x$ +are the stresslet densities, and the locations $x$ at which the velocity and its gradient are evaluated are referred to as the evaluation points. diff --git a/make.inc.icc b/make.inc.icc index d09add05..4e26423e 100644 --- a/make.inc.icc +++ b/make.inc.icc @@ -1,7 +1,7 @@ CC=icc CXX=icpc FC=ifort -FFLAGS= -fPIC -O3 -march=native -funroll-loops -mkl +FFLAGS= -fPIC -O3 -march=native -funroll-loops -mkl -w LIBS=-lm CLIBS = -lm -ldl -lifcore diff --git a/make.inc.macos.gnu b/make.inc.macos.gnu index 09d29211..5051a7ac 100644 --- a/make.inc.macos.gnu +++ b/make.inc.macos.gnu @@ -1,6 +1,6 @@ # makefile overrides # OS: macOS -# Compiler: gfortran X.X +# Compiler: gfortran X.X/Clang # OpenMP: enabled # @@ -22,8 +22,6 @@ OMPLIBS = -lgomp # MATLAB interface: FDIR=$$(dirname `gfortran --print-file-name libgfortran.dylib`) MFLAGS +=-L${FDIR} -MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex -#LIBS = -lm -lstdc++.6 -#MEXLIBS= -lm -lstdc++.6 -lgfortran -ldl +MEX = $(shell ls -d /Applications/MATLAB_R* | sort | tail -1)/bin/mex diff --git a/make.inc.macos.intel b/make.inc.macos.intel index b51cab39..5797c02c 100644 --- a/make.inc.macos.intel +++ b/make.inc.macos.intel @@ -7,7 +7,8 @@ CC=icc CXX=icpc FC=ifort -FFLAGS= -fPIC -O3 -march=native -funroll-loops -qmkl + +FFLAGS= -fPIC -O3 -march=native -funroll-loops -mkl -std=legacy -w LIBS= #CLIBS = -lm -ldl -lifcore CLIBS = -lm -ldl @@ -28,4 +29,9 @@ endif OMPFLAGS = -qopenmp OMPLIBS = -qopenmp +# MATLAB interface: +FDIR=$$(dirname `gfortran --print-file-name libgfortran.dylib`) +MFLAGS +=-L${FDIR} +MEX = $(shell ls -d /Applications/MATLAB_R* | sort | tail -1)/bin/mex + diff --git a/make.inc.macos.nag b/make.inc.macos.nag new file mode 100644 index 00000000..6c07c8ff --- /dev/null +++ b/make.inc.macos.nag @@ -0,0 +1,30 @@ +# make.inc for NAG Fortran compiler +# Online documentation: https://www.nag.com/nagware/np/r71_doc/manual/compiler_2_4.html#OPTIONS + +FC=nagfor + +# The path of libraries by NAG compiler +LIB_NAG = /usr/local/lib/NAG_Fortran + +# Brief descriptions of specified options below: +# -PIC: produce position-independent code +# -O2: optimization at a normal level +# -Ounroll=2: the depth of loo-unrolling +# -f90_sign: use the Fortran 77/90 version of the SIGN intrinsic instead of the Fortran 95 one +# -dcfuns: enable recognition of non-standard double precision complex intrinsic functions. +# -dusty: allows the compilation and execution of legacy software. +# -w=x77: suppresses extension warnings for obsolete but common extensions to Fortran 77. +# -w=unreffed: suppresses warning messages about variables set but never referenced. +# -w=unused: suppresses warning messages about unused entities. +# -ieee=full: set the mode of IEEE arithmetic operation according to full mode. + +# Main compile command for NAG Fortran compiler +FFLAGS = -PIC -O2 -Ounroll=1 -f90_sign -dcfuns -dusty -w=obs -w=x77 -w=unreffed -w=unused -ieee=full + +# Flags overwritten in makefile +OMPFLAGS = -openmp +# OMPLIBS = -lf71omp64 -L$(LIB_NAG) +OMPLIBS = -lf71omp64 -lf71rts -L$(LIB_NAG) +LIBS = -lf71rts -L$(LIB_NAG) +CLIBS = -lm -ldl -L$(LIB_NAG) +FFLAGS_DYN = -PIC diff --git a/make.inc.windows.mingw b/make.inc.windows.mingw index d2c4e49c..0c58923b 100644 --- a/make.inc.windows.mingw +++ b/make.inc.windows.mingw @@ -4,7 +4,7 @@ # OpenMP: default enabled unless specified # -FFLAGS= -fPIC -O3 -funroll-loops -std=legacy +FFLAGS= -fPIC -O3 -funroll-loops -std=legacy -w DYNAMICLIB = $(LIBNAME).dll LIMPLIB = $(LIBNAME)_dll.lib diff --git a/makefile b/makefile index a7e35712..e5dafa06 100644 --- a/makefile +++ b/makefile @@ -14,7 +14,8 @@ FC=gfortran # set compiler flags for c and fortran -FFLAGS= -fPIC -O3 -march=native -funroll-loops -std=legacy +FFLAGS= -fPIC -O3 -march=native -funroll-loops -std=legacy -w +FFLAGS_DYN= -shared -fPIC CFLAGS= -fPIC -O3 -march=native -funroll-loops -std=c99 CXXFLAGS= -std=c++11 -DSCTL_PROFILE=-1 -fPIC -O3 -march=native -funroll-loops @@ -86,6 +87,7 @@ endif # vectorized kernel directory SRCDIR = ./vec-kernels/src INCDIR = ./vec-kernels/include +FINCDIR = ./src/Helmholtz LIBDIR = lib-static # objects to compile @@ -192,10 +194,10 @@ usage: $(CXX) -c $(CXXFLAGS) $< -o $@ %.o: %.c %.h $(CC) -c $(CFLAGS) $< -o $@ -%.o: %.f %.h - $(FC) -c $(FFLAGS) $< -o $@ +%.o: %.f + $(FC) -c $(FFLAGS) -I$(FINCDIR) $< -o $@ %.o: %.f90 - $(FC) -c $(FFLAGS) $< -o $@ + $(FC) -c $(FFLAGS) -I$(FINCDIR) $< -o $@ # build the library... lib: $(STATICLIB) $(DYNAMICLIB) @@ -224,7 +226,7 @@ $(STATICLIB): $(OBJS) ar rcs $(STATICLIB) $(OBJS) mv $(STATICLIB) lib-static/ $(DYNAMICLIB): $(OBJS) - $(FC) -shared -fPIC $(OBJS) -o $(DYNAMICLIB) $(DYLIBS) + $(FC) $(FFLAGS_DYN) $(OBJS) -o $(DYNAMICLIB) $(DYLIBS) mv $(DYNAMICLIB) lib/ [ ! -f $(LIMPLIB) ] || mv $(LIMPLIB) lib/ diff --git a/src/Common/fmmcommon.f b/src/Common/fmmcommon.f index d516ad67..ccf52b5a 100644 --- a/src/Common/fmmcommon.f +++ b/src/Common/fmmcommon.f @@ -218,8 +218,8 @@ subroutine ireorderi(ndim,n,arr,arrsort,iarr) c subroutine drescale(n,a,r) implicit none - real *8 a(n),r integer i,n + real *8 a(n),r C$OMP PARALLEL DO DEFAULT(SHARED) do i=1,n diff --git a/src/Common/tree_routs3d.f b/src/Common/tree_routs3d.f index 36bcfa45..a6cfc849 100644 --- a/src/Common/tree_routs3d.f +++ b/src/Common/tree_routs3d.f @@ -1125,11 +1125,11 @@ subroutine getlist4pwdirtest(dir,censrc,centrg,boxsize) subroutine subdividebox(pos,npts,center,boxsize, 1 isorted,iboxfl,subcenters) implicit none + integer npts double precision pos(3,npts) double precision center(3) double precision subcenters(3,8) double precision boxsize - integer npts integer isorted(*) integer iboxfl(2,8) diff --git a/src/Common/yrecursion.f b/src/Common/yrecursion.f index 1168a948..3dae2934 100644 --- a/src/Common/yrecursion.f +++ b/src/Common/yrecursion.f @@ -1430,7 +1430,8 @@ subroutine zylgndrbr(nmax, z, y) c branch cut at (0,+i), select the lower branch c of complex square root c - if( imag(1-z*z) .gt. 0 .and. real(1-z*z) .lt. 0) u=+sqrt(1-z*z) +c if( imag(1-z*z) .gt. 0 .and. real(1-z*z) .lt. 0) u=+sqrt(1-z*z) + if( dimag(1-z*z) .gt. 0 .and. real(1-z*z) .lt. 0) u=+sqrt(1-z*z) ccc call prin2('in zylgndrbr, u=*', -u, 2) ccc call prin2('in zylgndrbr, 1-z^2=*', 1-z*z, 2) c @@ -1498,8 +1499,10 @@ subroutine zylgndrsc(nmax, z,scale, ysc) c ztmp = 1-z*z u=-sqrt(ztmp) - if(abs(imag(z)).le.1.0d-16.and.abs(real(z)).gt.1) then - if(imag(u).lt.0) u = dconjg(u) +c if(abs(imag(z)).le.1.0d-16.and.abs(real(z)).gt.1) then +c if(imag(u).lt.0) u = dconjg(u) + if(abs(dimag(z)).le.1.0d-16.and.abs(real(z)).gt.1) then + if(dimag(u).lt.0) u = dconjg(u) endif ysc(0,0)=1 do m=0, nmax diff --git a/src/Helmholtz/hfmm3d.f b/src/Helmholtz/hfmm3d.f index 77b547cb..ec9308ae 100644 --- a/src/Helmholtz/hfmm3d.f +++ b/src/Helmholtz/hfmm3d.f @@ -562,6 +562,7 @@ subroutine hfmm3dmain(nd,eps,zk, double complex jsort(nd,0:ntj,-ntj:ntj,nexpc) + integer nboxes integer *8 iaddr(2,nboxes), lmptot double precision rmlexp(lmptot) @@ -575,7 +576,6 @@ subroutine hfmm3dmain(nd,eps,zk, integer nterms(0:nlevels) integer *8 ipointer(8),ltree integer itree(ltree) - integer nboxes double precision rscales(0:nlevels) double precision boxsize(0:nlevels) integer isrcse(2,nboxes),itargse(2,nboxes),iexpcse(2,nboxes) @@ -752,7 +752,7 @@ subroutine hfmm3dmain(nd,eps,zk, zkiupbound = 12*pi zkrupbound = 16*pi - zi = imag(zk) + zi = dimag(zk) ilevcutoff = -1 @@ -1054,7 +1054,7 @@ subroutine hfmm3dmain(nd,eps,zk, allocate(iboxlexp(nd*(nterms(ilev)+1)* 1 (2*nterms(ilev)+1),8,nthd)) zk2 = zk*boxsize(ilev) - if(real(zk2).le.zkrupbound.and.imag(zk2).lt.zkiupbound.and. + if(real(zk2).le.zkrupbound.and.dimag(zk2).lt.zkiupbound.and. 1 ilev.gt.ilevcutoff) then c get new pw quadrature @@ -1562,7 +1562,7 @@ subroutine hfmm3dmain(nd,eps,zk, deallocate(pgboxwexp) - else if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound). + else if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound). 1 and.ilev.gt.ilevcutoff) then nquad2 = nterms(ilev)*2.2 if(ifprint.ge.1) print *, "In point and shoot regime" @@ -1719,7 +1719,7 @@ subroutine hfmm3dmain(nd,eps,zk, if(ifcharge.eq.1.and.ifdipole.eq.0) then do ilev=1,nlevels zk2 = zk*boxsize(ilev) - if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound). + if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound). 1 and.ilev.gt.ilevcutoff) then C$OMP PARALLEL DO DEFAULT(SHARED) @@ -1754,7 +1754,7 @@ subroutine hfmm3dmain(nd,eps,zk, if(ifcharge.eq.0.and.ifdipole.eq.1) then do ilev=1,nlevels zk2 = zk*boxsize(ilev) - if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound). + if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound). 1 and.ilev.gt.ilevcutoff) then C$OMP PARALLEL DO DEFAULT(SHARED) @@ -1789,7 +1789,7 @@ subroutine hfmm3dmain(nd,eps,zk, if(ifcharge.eq.1.and.ifdipole.eq.1) then do ilev=1,nlevels zk2 = zk*boxsize(ilev) - if((real(zk2).gt.zkrupbound.or.imag(zk2).gt.zkiupbound). + if((real(zk2).gt.zkrupbound.or.dimag(zk2).gt.zkiupbound). 1 and.ilev.gt.ilevcutoff) then C$OMP PARALLEL DO DEFAULT(SHARED) diff --git a/src/Helmholtz/hfmm3d_memest.f b/src/Helmholtz/hfmm3d_memest.f index 40d95fb2..8ea7ba34 100644 --- a/src/Helmholtz/hfmm3d_memest.f +++ b/src/Helmholtz/hfmm3d_memest.f @@ -377,7 +377,7 @@ subroutine hfmm3d_memest(nd,eps,zk,nsource,source,ifcharge, zkiupbound = 12*pi zkrupbound = 16*pi - zi = imag(zkfmm) + zi = dimag(zkfmm) ilevcutoff = -1 @@ -391,7 +391,7 @@ subroutine hfmm3d_memest(nd,eps,zk,nsource,source,ifcharge, do ilev=2,nlevels zk2 = zkfmm*boxsize(ilev) - if(real(zk2).le.zkrupbound.and.imag(zk2).lt.zkiupbound.and. + if(real(zk2).le.zkrupbound.and.dimag(zk2).lt.zkiupbound.and. 1 ilev.gt.ilevcutoff) then ier = 0 diff --git a/src/Helmholtz/hfmm3d_mps.f90 b/src/Helmholtz/hfmm3d_mps.f90 index 9946c6ee..f19847c2 100644 --- a/src/Helmholtz/hfmm3d_mps.f90 +++ b/src/Helmholtz/hfmm3d_mps.f90 @@ -393,7 +393,7 @@ subroutine hfmm3dmain_mps(nd, eps, zk, & integer :: impolesort(nmpole) ! storage stuff for tree and multipole expansions - integer :: lmptemp + integer :: lmptemp,nboxes integer *8 :: iaddr(2,nboxes), lmptot double precision :: rmlexp(lmptot) double precision :: mptemp(lmptemp) @@ -406,7 +406,6 @@ subroutine hfmm3dmain_mps(nd, eps, zk, & integer :: nterms(0:nlevels) integer *8 :: ipointer(8) integer :: itree(ltree) - integer :: nboxes integer :: mnbors,mnlist1, mnlist2,mnlist3,mnlist4 integer :: isrcse(2,nmpole) integer, allocatable :: nlist1(:),list1(:,:) @@ -819,7 +818,7 @@ subroutine hfmm3dmain_mps(nd, eps, zk, & ! load the necessary quadrature for plane waves zk2 = zk*boxsize(ilev) - if ( (real(zk2).le.16*pi) .and. (imag(zk2).le.12*pi) & + if ( (real(zk2).le.16*pi) .and. (dimag(zk2).le.12*pi) & .and. (ifmp .eq. 0) ) then ier = 0 diff --git a/src/Helmholtz/hfmm3dwrap_legacy.f b/src/Helmholtz/hfmm3dwrap_legacy.f index a296fa48..87c7233c 100644 --- a/src/Helmholtz/hfmm3dwrap_legacy.f +++ b/src/Helmholtz/hfmm3dwrap_legacy.f @@ -195,12 +195,12 @@ subroutine hfmm3dparttarg(ier,iprec,zk,nsource,source, double complex charge(nsource),dipstr(nsource) double precision dipvec(3,nsource) + integer ntarg integer ifpot,iffld,ifpottarg,iffldtarg double complex pot(nsource),fld(3,nsource) double complex pottarg(ntarg),fldtarg(3,ntarg) integer nd,ifpgh,ifpghtarg - integer ntarg double precision targ(3,ntarg) double complex, allocatable :: dipvec_in(:,:) double complex, allocatable :: pottmp(:),gradtmp(:,:) diff --git a/src/Helmholtz/hpwrouts.f b/src/Helmholtz/hpwrouts.f index 93d17864..cd5ec7fc 100644 --- a/src/Helmholtz/hpwrouts.f +++ b/src/Helmholtz/hpwrouts.f @@ -145,33 +145,33 @@ subroutine hmkexps(rlams,nlambs,numphys,nexptotp,zk,xs,ys,zs) do 200 mth = 1,numphys(nl) u = (mth-1)*hu ncurrent = ntot+mth - zs(1,ncurrent) = cdexp(-rlams(nl) ) - zs(2,ncurrent) = cdexp(-2.0d0*rlams(nl) ) - zs(3,ncurrent) = cdexp(-3.0d0*rlams(nl) ) - zs(4,ncurrent) = cdexp(-4.0d0*rlams(nl) ) - zs(5,ncurrent) = cdexp(-5.0d0*rlams(nl) ) - xs(-1,ncurrent) = cdexp(-ima*rk*cos(u)) - xs(-2,ncurrent) = cdexp(-ima*rk*2.0d0*cos(u)) - xs(-3,ncurrent) = cdexp(-ima*rk*3.0d0*cos(u)) - xs(-4,ncurrent) = cdexp(-ima*rk*4.0d0*cos(u)) - xs(-5,ncurrent) = cdexp(-ima*rk*5.0d0*cos(u)) + zs(1,ncurrent) = exp(-rlams(nl) ) + zs(2,ncurrent) = exp(-2.0d0*rlams(nl) ) + zs(3,ncurrent) = exp(-3.0d0*rlams(nl) ) + zs(4,ncurrent) = exp(-4.0d0*rlams(nl) ) + zs(5,ncurrent) = exp(-5.0d0*rlams(nl) ) + xs(-1,ncurrent) = exp(-ima*rk*cos(u)) + xs(-2,ncurrent) = exp(-ima*rk*2.0d0*cos(u)) + xs(-3,ncurrent) = exp(-ima*rk*3.0d0*cos(u)) + xs(-4,ncurrent) = exp(-ima*rk*4.0d0*cos(u)) + xs(-5,ncurrent) = exp(-ima*rk*5.0d0*cos(u)) xs(0,ncurrent) = 1 - xs(1,ncurrent) = cdexp(ima*rk*cos(u)) - xs(2,ncurrent) = cdexp(ima*rk*2.0d0*cos(u)) - xs(3,ncurrent) = cdexp(ima*rk*3.0d0*cos(u)) - xs(4,ncurrent) = cdexp(ima*rk*4.0d0*cos(u)) - xs(5,ncurrent) = cdexp(ima*rk*5.0d0*cos(u)) - ys(-1,ncurrent) = cdexp(-ima*rk*dsin(u)) - ys(-2,ncurrent) = cdexp(-ima*rk*2.0d0*dsin(u)) - ys(-3,ncurrent) = cdexp(-ima*rk*3.0d0*dsin(u)) - ys(-4,ncurrent) = cdexp(-ima*rk*4.0d0*dsin(u)) - ys(-5,ncurrent) = cdexp(-ima*rk*5.0d0*dsin(u)) + xs(1,ncurrent) = exp(ima*rk*cos(u)) + xs(2,ncurrent) = exp(ima*rk*2.0d0*cos(u)) + xs(3,ncurrent) = exp(ima*rk*3.0d0*cos(u)) + xs(4,ncurrent) = exp(ima*rk*4.0d0*cos(u)) + xs(5,ncurrent) = exp(ima*rk*5.0d0*cos(u)) + ys(-1,ncurrent) = exp(-ima*rk*dsin(u)) + ys(-2,ncurrent) = exp(-ima*rk*2.0d0*dsin(u)) + ys(-3,ncurrent) = exp(-ima*rk*3.0d0*dsin(u)) + ys(-4,ncurrent) = exp(-ima*rk*4.0d0*dsin(u)) + ys(-5,ncurrent) = exp(-ima*rk*5.0d0*dsin(u)) ys(0,ncurrent) = 1 - ys(1,ncurrent) = cdexp(ima*rk*dsin(u)) - ys(2,ncurrent) = cdexp(ima*rk*2.0d0*dsin(u)) - ys(3,ncurrent) = cdexp(ima*rk*3.0d0*dsin(u)) - ys(4,ncurrent) = cdexp(ima*rk*4.0d0*dsin(u)) - ys(5,ncurrent) = cdexp(ima*rk*5.0d0*dsin(u)) + ys(1,ncurrent) = exp(ima*rk*dsin(u)) + ys(2,ncurrent) = exp(ima*rk*2.0d0*dsin(u)) + ys(3,ncurrent) = exp(ima*rk*3.0d0*dsin(u)) + ys(4,ncurrent) = exp(ima*rk*4.0d0*dsin(u)) + ys(5,ncurrent) = exp(ima*rk*5.0d0*dsin(u)) 200 continue ntot = ntot+numphys(nl) 400 continue @@ -2599,10 +2599,11 @@ subroutine hpw_ud_eval_p(nd,zk2,center,boxsize,ntarg,targ,nlam, 1 whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg) complex *16 rlams(nlam),pot(nd,ntarg) complex *16 whts(nlam),zk2 - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:),cc2(:) @@ -2672,10 +2673,11 @@ subroutine hpw_ns_eval_p(nd,zk2,center,boxsize,ntarg,targ,nlam, 1 rlams,whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg) complex *16 rlams(nlam),pot(nd,ntarg) complex *16 whts(nlam),zk2 - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:),cc2(:) @@ -2745,10 +2747,11 @@ subroutine hpw_ew_eval_p(nd,zk2,center,boxsize,ntarg,targ,nlam, 1 rlams,whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg) complex *16 rlams(nlam),pot(nd,ntarg) complex *16 whts(nlam),zk2 - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:),cc2(:) @@ -2819,10 +2822,11 @@ subroutine hpw_ud_eval_g(nd,zk2,center,boxsize,ntarg,targ,nlam, 2 grad) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg) complex *16 rlams(nlam),pot(nd,ntarg) complex *16 grad(nd,3,ntarg),whts(nlam),zk2 - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:),crc(:),crs(:),cc2(:) @@ -2903,10 +2907,11 @@ subroutine hpw_ns_eval_g(nd,zk2,center,boxsize,ntarg,targ,nlam, 2 grad) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg) complex *16 rlams(nlam),pot(nd,ntarg) complex *16 grad(nd,3,ntarg),whts(nlam),zk2 - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:),crc(:),crs(:),cc2(:) @@ -2985,10 +2990,11 @@ subroutine hpw_ew_eval_g(nd,zk2,center,boxsize,ntarg,targ,nlam, 2 grad) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg) complex *16 rlams(nlam),pot(nd,ntarg) complex *16 grad(nd,3,ntarg),whts(nlam),zk2 - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:),crc(:),crs(:),cc2(:) diff --git a/src/Helmholtz/hwts3e.f b/src/Helmholtz/hwts3e.f index 1b9de37f..7e45f173 100644 --- a/src/Helmholtz/hwts3e.f +++ b/src/Helmholtz/hwts3e.f @@ -148,8 +148,8 @@ subroutine hwts3e(ier,eps,rk,cxs,cws,n) c gamma=0 v=u/(1+gamma*abs(dble(-ima*rk))) - cxs(k)=u-ima*rk+abs(imag(-ima*rk))*ima*(v/(1+v)) - cws(k)=1+abs(imag(-ima*rk))*ima*((1+v)-v)/(1+v)**2 + cxs(k)=u-ima*rk+abs(dimag(-ima*rk))*ima*(v/(1+v)) + cws(k)=1+abs(dimag(-ima*rk))*ima*((1+v)-v)/(1+v)**2 $ /(1+gamma*abs(dble(-ima*rk))) cws(k)=cws(k)*ws(k)*uweight endif @@ -275,7 +275,7 @@ subroutine hwts3dgetd(ier,rk,idomain) cy(24)=16*pi c rkrea=dble(rk) - rkima=imag(rk) + rkima=dimag(rk) c idomain = 0 ier = 0 diff --git a/src/Laplace/laprouts3d.f b/src/Laplace/laprouts3d.f index 93a8c4ae..68afedbe 100644 --- a/src/Laplace/laprouts3d.f +++ b/src/Laplace/laprouts3d.f @@ -406,7 +406,7 @@ subroutine l3dmpevalg(nd,rscale,center,mpole,nterms, pot(idim,itarg)=pot(idim,itarg)+rtmp1*rtmp2 ur(idim) = ur(idim) + rtmp4*rtmp2 utheta(idim) = utheta(idim)+rtmp5*rtmp2 - rtmp2 = 2*imag(mpole(idim,n,m)*ephi(m)) + rtmp2 = 2*dimag(mpole(idim,n,m)*ephi(m)) uphi(idim) = uphi(idim) + rtmp6*rtmp2 enddo enddo @@ -1272,7 +1272,7 @@ subroutine l3dtaevalg(nd,rscale,center,mpole,nterms, pot(idim,itarg)=pot(idim,itarg)+rtmp1*rtmp2 ur(idim) = ur(idim) + rtmp4*rtmp2 utheta(idim) = utheta(idim)+rtmp5*rtmp2 - rtmp2 = 2*imag(mpole(idim,n,m)*ephi(m)) + rtmp2 = 2*dimag(mpole(idim,n,m)*ephi(m)) uphi(idim) = uphi(idim) + rtmp6*rtmp2 enddo enddo diff --git a/src/Laplace/lfmm3d.f b/src/Laplace/lfmm3d.f index f8437ae6..2e26cca8 100644 --- a/src/Laplace/lfmm3d.f +++ b/src/Laplace/lfmm3d.f @@ -572,6 +572,7 @@ subroutine lfmm3dmain(nd,eps, double complex tsort(nd,0:ntj,-ntj:ntj,nexpc) double precision scjsort(nexpc) + integer nboxes integer *8 iaddr(2,nboxes), lmptot integer lmptemp double precision rmlexp(lmptot) @@ -588,7 +589,6 @@ subroutine lfmm3dmain(nd,eps, integer nterms(0:nlevels) integer *8 ipointer(8),ltree integer itree(ltree) - integer nboxes double precision rscales(0:nlevels) double precision boxsize(0:nlevels) integer isrcse(2,nboxes),itargse(2,nboxes),iexpcse(2,nboxes) diff --git a/src/Laplace/lfmm3dwrap_legacy.f b/src/Laplace/lfmm3dwrap_legacy.f index 0946dcf0..5207a887 100644 --- a/src/Laplace/lfmm3dwrap_legacy.f +++ b/src/Laplace/lfmm3dwrap_legacy.f @@ -427,7 +427,7 @@ subroutine l3dpartdirect(nsource, C$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) do i=1,ns charge_in(1,i) = real(charge(i)) - charge_in(2,i) = imag(charge(i)) + charge_in(2,i) = dimag(charge(i)) enddo C$OMP END PARALLEL DO if(ifdipole.ne.1) allocate(dipvec_in(2,3,1)) @@ -439,11 +439,11 @@ subroutine l3dpartdirect(nsource, C$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) do i=1,ns dipvec_in(1,1,i) = real(dipstr(i))*dipvec(1,i) - dipvec_in(2,1,i) = imag(dipstr(i))*dipvec(1,i) + dipvec_in(2,1,i) = dimag(dipstr(i))*dipvec(1,i) dipvec_in(1,2,i) = real(dipstr(i))*dipvec(2,i) - dipvec_in(2,2,i) = imag(dipstr(i))*dipvec(2,i) + dipvec_in(2,2,i) = dimag(dipstr(i))*dipvec(2,i) dipvec_in(1,3,i) = real(dipstr(i))*dipvec(3,i) - dipvec_in(2,3,i) = imag(dipstr(i))*dipvec(3,i) + dipvec_in(2,3,i) = dimag(dipstr(i))*dipvec(3,i) enddo C$OMP END PARALLEL DO if(ifcharge.ne.1) allocate(charge_in(2,1)) diff --git a/src/Laplace/lpwrouts.f b/src/Laplace/lpwrouts.f index ce9ae266..aa1a7c86 100644 --- a/src/Laplace/lpwrouts.f +++ b/src/Laplace/lpwrouts.f @@ -153,11 +153,11 @@ subroutine mkfexp(nlambs,numfour,numphys,fexpe,fexpo,fexpback) do j=1,nalpha alpha=(j-1)*halpha do mm = 2,numfour(i),2 - fexpe(nexte) = cdexp(ima*(mm-1)*alpha) + fexpe(nexte) = exp(ima*(mm-1)*alpha) nexte = nexte + 1 enddo do mm = 3,numfour(i),2 - fexpo(nexto) = cdexp(ima*(mm-1)*alpha) + fexpo(nexto) = exp(ima*(mm-1)*alpha) nexto = nexto + 1 enddo enddo @@ -170,7 +170,7 @@ subroutine mkfexp(nlambs,numfour,numphys,fexpe,fexpo,fexpback) do mm = 2,numfour(i) do j=1,nalpha alpha=(j-1)*halpha - fexpback(next) = cdexp(-ima*(mm-1)*alpha) + fexpback(next) = exp(-ima*(mm-1)*alpha) next = next + 1 enddo enddo @@ -637,7 +637,7 @@ subroutine ftophys(nd,mexpf,nlambs,rlams,numfour,numphys, enddo do mm = 2,numfour(i),2 do idim=1,nd - rtmp = 2*imag(fexpe(nexte)*mexpf(idim,nftot+mm)) + rtmp = 2*dimag(fexpe(nexte)*mexpf(idim,nftot+mm)) mexpphys(idim,nptot+ival) = mexpphys(idim,nptot+ival) + 1 dcmplx(0.0d0,rtmp) enddo @@ -2484,9 +2484,10 @@ subroutine lpw_ud_eval_p(nd,center,boxsize,ntarg,targ,nlam,rlams, 1 whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg) real *8 whts(nlam) - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:) @@ -2551,9 +2552,10 @@ subroutine lpw_ns_eval_p(nd,center,boxsize,ntarg,targ,nlam,rlams, 1 whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg) real *8 whts(nlam) - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:) @@ -2616,9 +2618,10 @@ subroutine lpw_ew_eval_p(nd,center,boxsize,ntarg,targ,nlam,rlams, 1 whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg) real *8 whts(nlam) - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:) @@ -2681,10 +2684,11 @@ subroutine lpw_ud_eval_g(nd,center,boxsize,ntarg,targ,nlam,rlams, 1 whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot,grad) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg) real *8 grad(nd,3,ntarg) real *8 whts(nlam) - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:),crc(:),crs(:) @@ -2762,10 +2766,11 @@ subroutine lpw_ns_eval_g(nd,center,boxsize,ntarg,targ,nlam,rlams, 1 whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot,grad) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg) real *8 grad(nd,3,ntarg) real *8 whts(nlam) - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:),crc(:),crs(:) @@ -2841,10 +2846,11 @@ subroutine lpw_ew_eval_g(nd,center,boxsize,ntarg,targ,nlam,rlams, 1 whts,nphys,nexptotp,nphmax,mexpupphys,mexpdownphys,pot,grad) implicit none integer nd + integer ntarg,nlam real *8 center(3),boxsize,targ(3,ntarg),rlams(nlam),pot(nd,ntarg) real *8 grad(nd,3,ntarg) real *8 whts(nlam) - integer ntarg,nlam,nphys(nlam),nexptotp,nphmax + integer nphys(nlam),nexptotp,nphmax complex *16 mexpupphys(nd,nexptotp),mexpdownphys(nd,nexptotp) complex *16 ima complex *16, allocatable :: cc(:),crc(:),crs(:) diff --git a/src/Laplace/lwtsexp_sep2.f b/src/Laplace/lwtsexp_sep2.f index 69d81e8f..5c0c8fe9 100644 --- a/src/Laplace/lwtsexp_sep2.f +++ b/src/Laplace/lwtsexp_sep2.f @@ -4530,7 +4530,8 @@ subroutine lwtsexp3sep2(n,xs,ws,err) c subroutine numthetasix(numtets,nlams) implicit none - integer numtets(nlams),nlams + integer nlams + integer numtets(nlams) c c This routine returns the number of Fourier modes needed in the c phi integral for each of the discrete lambda values given diff --git a/test/Helmholtz/test_helmrouts3d.make b/test/Helmholtz/test_helmrouts3d.make index a6d5799f..9acf9cf3 100644 --- a/test/Helmholtz/test_helmrouts3d.make +++ b/test/Helmholtz/test_helmrouts3d.make @@ -1,5 +1,6 @@ #HOST = gcc -HOST = gcc-openmp +#HOST = gcc-openmp +HOST = nag PROJECT = int2-helmrouts3d @@ -16,6 +17,14 @@ ifeq ($(HOST),gcc-openmp) FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy endif +ifeq ($(HOST),nag) + FC=nagfor + FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp + OMPFLAGS= -openmp + OMPLIBS = -lf71omp64 + CLIBS = -lm -ldl +endif + # Test objects # COM = ../../src/Common diff --git a/test/Helmholtz/test_hfmm3d.make b/test/Helmholtz/test_hfmm3d.make index e0adbc61..af2ea354 100644 --- a/test/Helmholtz/test_hfmm3d.make +++ b/test/Helmholtz/test_hfmm3d.make @@ -1,5 +1,6 @@ #HOST = gcc -HOST = gcc-openmp +#HOST = gcc-openmp +HOST = nag PROJECT = int2-hfmm3d @@ -16,6 +17,13 @@ ifeq ($(HOST),gcc-openmp) FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy endif +ifeq ($(HOST),nag) + FC=nagfor + FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp + OMPFLAGS= -openmp + OMPLIBS = -lf71omp64 + CLIBS = -lm -ldl +endif # Test objects # diff --git a/test/Helmholtz/test_hfmm3d_adjoint.make b/test/Helmholtz/test_hfmm3d_adjoint.make index 7bad9d3a..76661492 100644 --- a/test/Helmholtz/test_hfmm3d_adjoint.make +++ b/test/Helmholtz/test_hfmm3d_adjoint.make @@ -1,5 +1,6 @@ #HOST = gcc -HOST = gcc-openmp +#HOST = gcc-openmp +HOST = nag PROJECT = int2-hfmm3d-mps @@ -16,6 +17,14 @@ ifeq ($(HOST),gcc-openmp) FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy endif +ifeq ($(HOST),nag) + FC=nagfor +# FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full + FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp + OMPFLAGS= -openmp + OMPLIBS = -lf71omp64 + CLIBS = -lm -ldl +endif # Test objects # diff --git a/test/Helmholtz/test_hfmm3d_mps.f90 b/test/Helmholtz/test_hfmm3d_mps.f90 index 32b4fd17..93e65725 100644 --- a/test/Helmholtz/test_hfmm3d_mps.f90 +++ b/test/Helmholtz/test_hfmm3d_mps.f90 @@ -303,6 +303,7 @@ subroutine comperr_vec(nd,zk,ns,source,ifcharge,charge,ifdipole, & double complex zk integer ns,nt,ifcharge,ifdipole,ifpgh,ifpghtarg + integer nd double precision source(3,*),targ(3,*) double complex dipvec(nd,3,*) double complex charge(nd,*) @@ -310,7 +311,7 @@ subroutine comperr_vec(nd,zk,ns,source,ifcharge,charge,ifdipole, & double complex pot(nd,*),pottarg(nd,*),grad(nd,3,*), & gradtarg(nd,3,*) - integer i,j,ntest,nd,idim + integer i,j,ntest,idim double precision err,ra diff --git a/test/Helmholtz/test_hfmm3d_mps.make b/test/Helmholtz/test_hfmm3d_mps.make index 5d21c4db..c201b9ad 100644 --- a/test/Helmholtz/test_hfmm3d_mps.make +++ b/test/Helmholtz/test_hfmm3d_mps.make @@ -1,5 +1,6 @@ #HOST = gcc -HOST = gcc-openmp +#HOST = gcc-openmp +HOST = nag PROJECT = int2-hfmm3d-mps @@ -16,6 +17,13 @@ ifeq ($(HOST),gcc-openmp) FFLAGS=-fPIC -O3 -march=native -fopenmp -std=legacy endif +ifeq ($(HOST),nag) + FC=nagfor + FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp + OMPFLAGS= -openmp + OMPLIBS = -lf71omp64 + CLIBS = -lm -ldl +endif # Test objects # diff --git a/test/Helmholtz/test_hfmm3d_vec.f b/test/Helmholtz/test_hfmm3d_vec.f index 6c50a4d4..e6485004 100644 --- a/test/Helmholtz/test_hfmm3d_vec.f +++ b/test/Helmholtz/test_hfmm3d_vec.f @@ -701,6 +701,7 @@ subroutine comperr_vec(nd,zk,ns,source,ifcharge,charge,ifdipole, double complex zk integer ns,nt,ifcharge,ifdipole,ifpgh,ifpghtarg + integer nd double precision source(3,*),targ(3,*) double complex dipvec(nd,3,*) double complex charge(nd,*) @@ -708,7 +709,7 @@ subroutine comperr_vec(nd,zk,ns,source,ifcharge,charge,ifdipole, double complex pot(nd,*),pottarg(nd,*),grad(nd,3,*), 1 gradtarg(nd,3,*) - integer i,j,ntest,nd,idim + integer i,j,ntest,idim double precision err,ra diff --git a/test/Helmholtz/test_hfmm3d_vec.make b/test/Helmholtz/test_hfmm3d_vec.make index 3fd1d7f6..6620ae62 100644 --- a/test/Helmholtz/test_hfmm3d_vec.make +++ b/test/Helmholtz/test_hfmm3d_vec.make @@ -1,5 +1,6 @@ #HOST = gcc -HOST = gcc-openmp +#HOST = gcc-openmp +HOST = nag PROJECT = int2-hfmm3d-vec @@ -16,6 +17,13 @@ ifeq ($(HOST),gcc-openmp) FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy endif +ifeq ($(HOST),nag) + FC=nagfor + FFLAGS=-PIC -O3 -Ounroll=1 -f90_sign -dcfuns -dusty -w=x77 -w=unreffed -w=unused -ieee=full -openmp + OMPFLAGS= -openmp + OMPLIBS = -lf71omp64 + CLIBS = -lm -ldl +endif # Test objects # diff --git a/test/Laplace/test_lfmm3d_vec.f b/test/Laplace/test_lfmm3d_vec.f index 22089db1..f9fbdd3d 100644 --- a/test/Laplace/test_lfmm3d_vec.f +++ b/test/Laplace/test_lfmm3d_vec.f @@ -989,6 +989,7 @@ subroutine comperr_vec(nd,ns,source,ifcharge,charge,ifdipole, double complex zk integer ns,nt,ifcharge,ifdipole,ifpgh,ifpghtarg + integer nd double precision source(3,*),targ(3,*) double precision dipvec(nd,3,*) double precision charge(nd,*) @@ -997,7 +998,7 @@ subroutine comperr_vec(nd,ns,source,ifcharge,charge,ifdipole, double precision gradtarg(nd,3,*) double precision hess(nd,6,*),hesstarg(nd,6,*) - integer i,j,ntest,nd,l,idim + integer i,j,ntest,l,idim double precision err,ra