From ecdb7735dbf6e65f065a3a77603c970201a243de Mon Sep 17 00:00:00 2001 From: Semih Akkurt Date: Mon, 19 Feb 2024 11:45:28 +0000 Subject: [PATCH] Construct the waves array for all periodic BCs. --- src/cuda/poisson_fft.f90 | 5 +- src/omp/poisson_fft.f90 | 4 +- src/poisson_fft.f90 | 106 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 106 insertions(+), 9 deletions(-) diff --git a/src/cuda/poisson_fft.f90 b/src/cuda/poisson_fft.f90 index 9b3adb019..600081ea8 100644 --- a/src/cuda/poisson_fft.f90 +++ b/src/cuda/poisson_fft.f90 @@ -6,6 +6,8 @@ module m_cuda_poisson_fft use m_poisson_fft, only: poisson_fft_t use m_tdsops, only: dirps_t + use m_cuda_common, only: SZ + implicit none type, extends(poisson_fft_t) :: cuda_poisson_fft_t @@ -33,9 +35,10 @@ function init(xdirps, ydirps, zdirps) result(poisson_fft) class(dirps_t), intent(in) :: xdirps, ydirps, zdirps type(cuda_poisson_fft_t) :: poisson_fft + integer :: nx, ny, nz - call poisson_fft%base_init(xdirps, ydirps, zdirps) + call poisson_fft%base_init(xdirps, ydirps, zdirps, SZ) nx = poisson_fft%nx; ny = poisson_fft%ny; nz = poisson_fft%nz diff --git a/src/omp/poisson_fft.f90 b/src/omp/poisson_fft.f90 index e561f8711..f34a1957f 100644 --- a/src/omp/poisson_fft.f90 +++ b/src/omp/poisson_fft.f90 @@ -4,6 +4,8 @@ module m_omp_poisson_fft use m_poisson_fft, only: poisson_fft_t use m_tdsops, only: dirps_t + use m_omp_common, only: SZ + implicit none type, extends(poisson_fft_t) :: omp_poisson_fft_t @@ -30,7 +32,7 @@ function init(xdirps, ydirps, zdirps) result(poisson_fft) type(omp_poisson_fft_t) :: poisson_fft integer :: nx, ny, nz - call poisson_fft%base_init(xdirps, ydirps, zdirps) + call poisson_fft%base_init(xdirps, ydirps, zdirps, SZ) end function init diff --git a/src/poisson_fft.f90 b/src/poisson_fft.f90 index 8f9e6e0ee..260c737b6 100644 --- a/src/poisson_fft.f90 +++ b/src/poisson_fft.f90 @@ -48,11 +48,12 @@ end subroutine fft_postprocess contains - subroutine base_init(self, xdirps, ydirps, zdirps) + subroutine base_init(self, xdirps, ydirps, zdirps, sz) implicit none class(poisson_fft_t) :: self class(dirps_t), intent(in) :: xdirps, ydirps, zdirps + integer, intent(in) :: sz integer :: nx, ny, nz @@ -62,25 +63,26 @@ subroutine base_init(self, xdirps, ydirps, zdirps) allocate (self%ay(self%nx), self%by(self%nx)) allocate (self%az(self%nx), self%bz(self%nx)) - allocate (self%waves(self%nx, self%ny, self%nz)) + !allocate (self%waves(self%nx, self%ny, self%nz)) + allocate (self%waves(sz, self%nx, (self%ny*(self%nz/2 + 1))/sz)) ! waves_set requires some of the preprocessed tdsops variables. - call self%waves_set(xdirps, ydirps, zdirps) + call self%waves_set(xdirps, ydirps, zdirps, sz) end subroutine base_init - subroutine waves_set(self, xdirps, ydirps, zdirps) + subroutine waves_set(self, xdirps, ydirps, zdirps, sz) implicit none class(poisson_fft_t) :: self type(dirps_t), intent(in) :: xdirps, ydirps, zdirps + integer, intent(in) :: sz complex(dp), allocatable, dimension(:) :: xkx, xk2, yky, yk2, zkz, zk2, & exs, eys, ezs integer :: nx, ny, nz - real(dp) :: w, wp, rlexs, rleys, rlezs, & - xtt, ytt, ztt, xtt1, ytt1, ztt1, xt1, yt1, zt1 + real(dp) :: w, wp, rlexs, rleys, rlezs, xtt, ytt, ztt, xt1, yt1, zt1 complex(dp) :: xt2, yt2, zt2, xyzk integer :: i, j, k, ka, kb, ix, iy, iz @@ -107,7 +109,97 @@ subroutine waves_set(self, xdirps, ydirps, zdirps) allocate(xkx(nx), xk2(nx), exs(nx)) allocate(yky(ny), yk2(ny), eys(ny)) allocate(zkz(nz), zk2(nz), ezs(nz)) - !xkx(:) = 0; xk2(:) = 0; yky(:) = 0; yk2(:) = 0; zkz(:) = 0; zk2(:) = 0 + xkx(:) = 0; xk2(:) = 0; yky(:) = 0; yk2(:) = 0; zkz(:) = 0; zk2(:) = 0 + + ! periodic-x + do i = 1, nx/2 + 1 + w = 2*pi*(i - 1)/nx + wp = xdirps%stagder_v2p%a*2*xdirps%d*sin(0.5_dp*w) & + + xdirps%stagder_v2p%b*2*xdirps%d*sin(1.5_dp*w) + wp = wp/(1._dp + 2*xdirps%stagder_v2p%alpha*cos(w)) + + xkx(i) = cmplx(1._dp, 1._dp, kind=dp)*(nx*wp/xdirps%L) + exs(i) = cmplx(1._dp, 1._dp, kind=dp)*(nx*w/xdirps%L) + xk2(i) = cmplx(1._dp, 1._dp, kind=dp)*(nx*wp/xdirps%L)**2 + end do + do i = nx/2 + 2, nx + xkx(i) = xkx(nx - i + 2) + exs(i) = exs(nx - i + 2) + xk2(i) = xk2(nx - i + 2) + end do + + ! periodic-y + do i = 1, ny/2 + 1 + w = 2*pi*(i - 1)/ny + wp = ydirps%stagder_v2p%a*2*ydirps%d*sin(0.5_dp*w) & + + ydirps%stagder_v2p%b*2*ydirps%d*sin(1.5_dp*w) + wp = wp/(1._dp + 2*ydirps%stagder_v2p%alpha*cos(w)) + + yky(i) = cmplx(1._dp, 1._dp, kind=dp)*(ny*wp/ydirps%L) + eys(i) = cmplx(1._dp, 1._dp, kind=dp)*(ny*w/ydirps%L) + yk2(i) = cmplx(1._dp, 1._dp, kind=dp)*(ny*wp/ydirps%L)**2 + end do + do i = ny/2 + 2, ny + yky(i) = yky(ny-i+2) + eys(i) = eys(ny-i+2) + yk2(i) = yk2(ny-i+2) + end do + + ! periodic-z + do i = 1, nz/2 + 1 + w = 2*pi*(i - 1)/nz + wp = zdirps%stagder_v2p%a*2*zdirps%d*sin(0.5_dp*w) & + + zdirps%stagder_v2p%b*2*zdirps%d*sin(1.5_dp*w) + wp = wp/(1._dp + 2*zdirps%stagder_v2p%alpha*cos(w)) + + zkz(i) = cmplx(1._dp, 1._dp, kind=dp)*(nz*wp/zdirps%L) + ezs(i) = cmplx(1._dp, 1._dp, kind=dp)*(nz*w/zdirps%L) + zk2(i) = cmplx(1._dp, 1._dp, kind=dp)*(nz*wp/zdirps%L)**2 + end do + + !do k = !sp%xst(3), sp%xen(3) + !do j = sp%xst(2), sp%xen(2) + ! kxyz array is in z pencil orientation, with nz=nz/2+1 !!nooo + ! kxyz array is in x pencil orientation. + !do k = 1, nx*ny/SZ + print*, 'set kxyz array' + ! TODO: do loop ranges below are valid only for single rank runs + do ka = 1, nz/2 + 1 + do kb = 1, ny/sz + do j = 1, nx + do i = 1, sz + ix = j; iy = (kb-1)*sz+i; iz = ka !+ xderps%nspz_st - 1 + rlexs = real(exs(ix), kind=dp)*xdirps%d + rleys = real(eys(iy), kind=dp)*ydirps%d + rlezs = real(ezs(iz), kind=dp)*zdirps%d + + xtt = 2*(xdirps%interpl_v2p%a*cos(rlexs*0.5_dp) & + + xdirps%interpl_v2p%b*cos(rlexs*1.5_dp) & + + xdirps%interpl_v2p%c*cos(rlexs*2.5_dp) & + + xdirps%interpl_v2p%d*cos(rlexs*3.5_dp)) + ytt = 2*(ydirps%interpl_v2p%a*cos(rleys*0.5_dp) & + + ydirps%interpl_v2p%b*cos(rleys*1.5_dp) & + + ydirps%interpl_v2p%c*cos(rleys*2.5_dp) & + + ydirps%interpl_v2p%d*cos(rleys*3.5_dp)) + ztt = 2*(zdirps%interpl_v2p%a*cos(rlezs*0.5_dp) & + + zdirps%interpl_v2p%b*cos(rlezs*1.5_dp) & + + zdirps%interpl_v2p%c*cos(rlezs*2.5_dp) & + + zdirps%interpl_v2p%d*cos(rlezs*3.5_dp)) + + xt1 = 1._dp + 2*xdirps%interpl_v2p%alpha*cos(rlexs) + yt1 = 1._dp + 2*ydirps%interpl_v2p%alpha*cos(rleys) + zt1 = 1._dp + 2*zdirps%interpl_v2p%alpha*cos(rlezs) + + xt2 = xk2(ix)*(((ytt/yt1)*(ztt/zt1))**2) + yt2 = yk2(iy)*(((xtt/xt1)*(ztt/zt1))**2) + zt2 = zk2(iz)*(((xtt/xt1)*(ytt/yt1))**2) + + xyzk = xt2 + yt2 + zt2 + self%waves(i, j, ka + (kb - 1)*(nz/2 + 1)) = xyzk + end do + end do + end do + end do end subroutine waves_set