fixing bug in laplace translation operators for mps

flatironinstitute · Jul 20, 2023 · b83b6ec · b83b6ec
1 parent e924041
commit b83b6ec
Show file tree

Hide file tree

Showing 7 changed files with 96 additions and 39 deletions.
diff --git a/examples/hfmm3d_vec_example.make b/examples/hfmm3d_vec_example.make
@@ -37,7 +37,7 @@ endif
 
 ifeq ($(HOST),gcc-openmp)
     FC = gfortran -L${LDFMM}
-    FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp 
+    FFLAGS=-fPIC -O3 -funroll-loops -march=native -fopenmp -std=legacy 
 endif
 
 ifeq ($(HOST),intel)

diff --git a/make.inc.macos.gnu b/make.inc.macos.gnu
@@ -1,12 +1,12 @@
 # makefile overrides
 # OS:       macOS
-# Compiler: gfortran 9.X
+# Compiler: gfortran X.X
 # OpenMP:   enabled
 #
 
-CC=gcc-9
-CXX=g++-9
-FC=gfortran-9
+CC=gcc
+CXX=g++
+FC=gfortran
 
 ifeq ($(PREFIX),)
     FMM_INSTALL_DIR=/usr/local/lib
@@ -20,8 +20,9 @@ OMPFLAGS = -fopenmp
 OMPLIBS = -lgomp
 
 # MATLAB interface:
-MFLAGS += -L/usr/local/lib/gcc/9
-MEX = $(shell ls -d /Applications/MATLAB_R201*.app)/bin/mex
+FDIR=$$(dirname `gfortran --print-file-name libgfortran.dylib`)
+MFLAGS +=-L${FDIR}
+MEX = $(shell ls -d /Applications/MATLAB_R20**.app)/bin/mex
 #LIBS = -lm -lstdc++.6
 #MEXLIBS= -lm -lstdc++.6 -lgfortran -ldl
 

diff --git a/matlab/fmm3d.c b/matlab/fmm3d.c
@@ -1112,6 +1112,31 @@ mxWrapReturnZDef_single   (mxWrapReturn_single_dcomplex, dcomplex,
 #define MWF77_stfmm3d stfmm3d_
 #define MWF77_st3ddirectstokg st3ddirectstokg_
 #define MWF77_st3ddirectstokstrsg st3ddirectstokstrsg_
+#elif defined(MWF77_UNDERSCORE0)
+#define MWF77_hndiv hndiv
+#define MWF77_hfmm3d_ndiv hfmm3d_ndiv
+#define MWF77_h3ddirectcp h3ddirectcp
+#define MWF77_h3ddirectdp h3ddirectdp
+#define MWF77_h3ddirectcdp h3ddirectcdp
+#define MWF77_h3ddirectcg h3ddirectcg
+#define MWF77_h3ddirectdg h3ddirectdg
+#define MWF77_h3ddirectcdg h3ddirectcdg
+#define MWF77_lndiv lndiv
+#define MWF77_lfmm3d_ndiv lfmm3d_ndiv
+#define MWF77_l3ddirectcp l3ddirectcp
+#define MWF77_l3ddirectdp l3ddirectdp
+#define MWF77_l3ddirectcdp l3ddirectcdp
+#define MWF77_l3ddirectcg l3ddirectcg
+#define MWF77_l3ddirectdg l3ddirectdg
+#define MWF77_l3ddirectcdg l3ddirectcdg
+#define MWF77_l3ddirectch l3ddirectch
+#define MWF77_l3ddirectdh l3ddirectdh
+#define MWF77_l3ddirectcdh l3ddirectcdh
+#define MWF77_emfmm3d emfmm3d
+#define MWF77_em3ddirect em3ddirect
+#define MWF77_stfmm3d stfmm3d
+#define MWF77_st3ddirectstokg st3ddirectstokg
+#define MWF77_st3ddirectstokstrsg st3ddirectstokstrsg
 #else /* f2c convention */
 #define MWF77_hndiv hndiv_
 #define MWF77_hfmm3d_ndiv hfmm3d_ndiv__

diff --git a/matlab/fmm3d_legacy.c b/matlab/fmm3d_legacy.c
@@ -1076,6 +1076,13 @@ mxWrapReturnZDef_single   (mxWrapReturn_single_dcomplex, dcomplex,
 #define MWF77_lfmm3dpartself lfmm3dpartself_
 #define MWF77_lfmm3dparttarg lfmm3dparttarg_
 #define MWF77_l3dpartdirect l3dpartdirect_
+#elif defined(MWF77_UNDERSCORE0)
+#define MWF77_hfmm3dpartself hfmm3dpartself
+#define MWF77_hfmm3dparttarg hfmm3dparttarg
+#define MWF77_h3dpartdirect h3dpartdirect
+#define MWF77_lfmm3dpartself lfmm3dpartself
+#define MWF77_lfmm3dparttarg lfmm3dparttarg
+#define MWF77_l3dpartdirect l3dpartdirect
 #else /* f2c convention */
 #define MWF77_hfmm3dpartself hfmm3dpartself_
 #define MWF77_hfmm3dparttarg hfmm3dparttarg_

diff --git a/src/Helmholtz/helmrouts3d.f b/src/Helmholtz/helmrouts3d.f
@@ -196,14 +196,14 @@ subroutine h3dmpevalp(nd,zk,rscale,center,mpole,nterms,ztarg,
         do n=1,nterms
           ztmp1 = fhs(n)*ynm(n,0)
           do idim=1,nd
-   	        pot(idim,itarg)=pot(idim,itarg)+mpole(idim,n,0)*ztmp1
+            pot(idim,itarg)=pot(idim,itarg)+mpole(idim,n,0)*ztmp1
           enddo
           do m=1,n
-	        ztmp1=fhs(n)*ynm(n,m)
+            ztmp1=fhs(n)*ynm(n,m)
             do idim=1,nd
-	          ztmp2 = mpole(idim,n,m)*ephi(m) 
-	          ztmp3 = mpole(idim,n,-m)*ephi(-m)
-	          ztmpsum = ztmp2+ztmp3
+              ztmp2 = mpole(idim,n,m)*ephi(m) 
+              ztmp3 = mpole(idim,n,-m)*ephi(-m)
+              ztmpsum = ztmp2+ztmp3
               pot(idim,itarg)=pot(idim,itarg)+ztmp1*ztmpsum
             enddo
           enddo
@@ -372,24 +372,24 @@ subroutine h3dmpevalg(nd,zk,rscale,center,mpole,nterms,ztarg,
           ztmp3 = -fhs(n)*ynmd(n,0)*stheta
           do idim=1,nd
             pot(idim,itarg)=pot(idim,itarg)+mpole(idim,n,0)*ztmp1
-	        ur(idim) = ur(idim) + mpole(idim,n,0)*ztmp2
+            ur(idim) = ur(idim) + mpole(idim,n,0)*ztmp2
             utheta(idim) = utheta(idim) + mpole(idim,n,0)*ztmp3
           enddo
           do m=1,n
-	        ztmp1=fhs(n)*ynm(n,m)*stheta
+            ztmp1=fhs(n)*ynm(n,m)*stheta
             ztmp4 = fhder(n)*ynm(n,m)*stheta
             ztmp5 = -fhs(n)*ynmd(n,m)
             ztmp6 = eye*m*fhs(n)*ynm(n,m)
 
             do idim=1,nd
               ztmp2 = mpole(idim,n,m)*ephi(m) 
-	          ztmp3 = mpole(idim,n,-m)*ephi(-m)
-	          ztmpsum = ztmp2+ztmp3
-	          pot(idim,itarg)=pot(idim,itarg)+ztmp1*ztmpsum
+              ztmp3 = mpole(idim,n,-m)*ephi(-m)
+              ztmpsum = ztmp2+ztmp3
+              pot(idim,itarg)=pot(idim,itarg)+ztmp1*ztmpsum
               ur(idim) = ur(idim) + ztmp4*ztmpsum
-	          utheta(idim) = utheta(idim) + ztmp5*ztmpsum
-	          ztmpsum = ztmp2 - ztmp3
-	          uphi(idim) = uphi(idim) + ztmp6*ztmpsum
+              utheta(idim) = utheta(idim) + ztmp5*ztmpsum
+              ztmpsum = ztmp2 - ztmp3
+              uphi(idim) = uphi(idim) + ztmp6*ztmpsum
             enddo
           enddo
         enddo

diff --git a/src/Laplace/l3dtrans.f b/src/Laplace/l3dtrans.f
@@ -122,10 +122,10 @@ subroutine l3dmpmp(nd,sc1,x0y0z0,mpole,nterms,sc2,
 
       if(nterms2.ge.30) then
         call rotviaproj(nd,-theta,nterms2,nterms2,nterms2,marray1,
-     1        nterms2,marray,ldc)
+     1        ldc,marray,ldc)
       else
         call rotviarecur(nd,-theta,nterms2,nterms2,nterms2,marray1,
-     1        nterms2,marray,ldc)
+     1        ldc,marray,ldc)
       endif
 c
 c
@@ -330,7 +330,7 @@ subroutine l3dmploc(nd,sc1,x0y0z0,mpole,nterms,
 c
       rshift = d
       call l3dmploczshift(nd,marray,sc1,ldc,nterms,marray1,
-     1      sc2,nterms2,nterms2,rshift,dc,lca)
+     1      sc2,ldc,nterms2,rshift,dc,lca)
 
 c
 c     reverse THETA rotation. 
@@ -556,7 +556,7 @@ subroutine l3dlocloc(nd,sc1,x0y0z0,locold,nterms,sc2,
 c
       rshift = d
        call l3dlocloczshift(nd,sc1,marray,ldc,nterms,sc2,marray1,
-     1           nterms2,nterms2,rshift,dc,lda)
+     1           ldc,nterms2,rshift,dc,lda)
 c
 c     reverse THETA rotation. 
 c     I.e. rotation of -THETA radians about the Yprime axis.

diff --git a/test/Laplace/test_laprouts3d.f b/test/Laplace/test_laprouts3d.f
@@ -38,9 +38,11 @@
       complex *16, allocatable :: mpole1(:,:,:),mpole2(:,:,:)
       complex *16, allocatable :: locexp1(:,:,:),locexp2(:,:,:)
       real *8, allocatable :: charge(:,:),dipvec(:,:,:)
-      real *8 wlege(100000),errs(3,5),err_exp(5)
-      real *8 scarray_loc(100000)
-      real *8 scarray_mp(100000)
+      real *8  errs(3,5),err_exp(5)
+      real *8, allocatable :: scarray_loc(:),scarray_mp(:)
+      real *8, allocatable :: scarray_loc2(:),scarray_mp2(:)
+      real *8, allocatable :: scarray_loc3(:),scarray_mp3(:)
+      real *8, allocatable :: wlege(:)
       integer ipass(5)
       complex *16 eye
 c
@@ -52,6 +54,13 @@
       write(*,*) "=========================================="
       write(*,*) "Testing suite for laprouts3d"
 
+      lw7 = 100000
+      ll = lw7
+      allocate(wlege(ll))
+      allocate(scarray_mp(ll),scarray_loc(ll))
+      allocate(scarray_mp2(ll),scarray_loc2(ll))
+      allocate(scarray_mp3(ll),scarray_loc3(ll))
+
       open(unit=33,file='print_testres.txt',access='append')
 
 c
@@ -164,18 +173,31 @@
       call l3dterms(eps, nterms2)
       call l3dterms(eps, nterms3)
 
+      nterms2 = nterms + 5
+      nterms3 = nterms + 10
+
+      nmax = max(nterms,nterms2)
+      nmax = max(nmax,nterms3) 
+
+      print *, "nmax=",nmax
+
 
       rconv1 = 1.0d0/sqrt(3.0d0)
       rconv2 = sqrt(3.0d0)/2.0d0
       rconv3 = 0.75d0
 
 
       nlege = 100
-      lw7 = 100000
       call ylgndrfwini(nlege,wlege,lw7,lused7)
+
       call l3dmpevalhessdini(nterms,scarray_mp)
       call l3dtaevalhessdini(nterms,scarray_loc)
 
+      call l3dmpevalhessdini(nterms2,scarray_mp2)
+      call l3dtaevalhessdini(nterms2,scarray_loc2)
+
+      call l3dmpevalhessdini(nterms3,scarray_mp3)
+      call l3dtaevalhessdini(nterms3,scarray_loc3)
 c
 c
 c       create multipole expansion:
@@ -245,9 +267,10 @@
         enddo
       enddo
 
-      call l3dmpevalh(nd,rscale2,c1,mpole2,nterms,ztrgs,nt,pots,
-     1      flds,hesss,thresh,scarray_mp)
-      err_exp(2) = 10*max(rconv2**(nterms),eps)
+      call l3dmpevalh(nd,rscale2,c1,mpole2,nterms2,ztrgs,nt,pots,
+     1      flds,hesss,thresh,scarray_mp2)
+      nn2 = min(nterms,nterms2)
+      err_exp(2) = 10*max(rconv2**(nn2),eps)
       write(*,'(a,e11.4)') 'Testing mpmp, expected error='
      1   ,err_exp(2)
       call errprinth(nd,nt,pots,opots,flds,oflds,hesss,ohesss,
@@ -282,9 +305,10 @@
       enddo
 
       call l3dtaevalh(nd,rscale2,c2,locexp2,nterms2,ztrgs,nt,
-     1      pots,flds,hesss,scarray_loc)
+     1      pots,flds,hesss,scarray_loc2)
+      nn2 = min(nterms,nterms2)
 
-      err_exp(3) = 10*max(rconv3**(nterms),eps)
+      err_exp(3) = 10*max(rconv3**(nn2),eps)
       write(*,'(a,e11.4)') 'Testing mploc, expected error='
      1   ,err_exp(3)
       call errprinth(nd,nt,pots,opots,flds,oflds,hesss,ohesss,
@@ -320,10 +344,10 @@
       enddo
 
       call l3dtaevalh(nd,rscale1,c3,locexp1,nterms3,ztrgs,nt,
-     1      pots,flds,hesss,scarray_loc)
+     1      pots,flds,hesss,scarray_loc3)
 
-
-      err_exp(4) = 10*max(rconv3**(nterms),eps)
+      nn2 = min(nterms,nterms3)
+      err_exp(4) = 10*max(rconv3**(nn2),eps)
       write(*,'(a,e11.4)') 'Testing locloc, expected error='
      1   ,err_exp(4)
 
@@ -360,9 +384,9 @@
       enddo
 
       call l3dtaevalh(nd,rscale1,c3,locexp1,nterms3,ztrgs,nt,
-     1      pots,flds,hesss,scarray_loc)
-
-      err_exp(5) = 10*max(rconv3**(nterms),eps)
+     1      pots,flds,hesss,scarray_loc3)
+      nn = min(nterms,nterms3)
+      err_exp(5) = 10*max(rconv3**(nn),eps)
       write(*,'(a,e11.4)') 'Testing formta and taeval, expected error='
      1   ,err_exp(5)
       call errprinth(nd,nt,pots,opots,flds,oflds,hesss,ohesss,