Skip to content

Commit

Permalink
Add option to not do in-place FFT
Browse files Browse the repository at this point in the history
This is currently disabled for cuFFT but enabled for hipFFT.

In-place FFTs seem to be an issue for ROCm at the moment. This is a
temporary workaround.
  • Loading branch information
samhatfield committed Sep 20, 2024
1 parent f6b40d6 commit 3c6810a
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 15 deletions.
7 changes: 7 additions & 0 deletions src/trans/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ if( HAVE_HIP )
algor/hicblas_gemm.hip.cpp
algor/hicfft.hip.cpp
)
ecbuild_info("warn: IN_PLACE_FFT not defined for hipFFT")
elseif( HAVE_CUDA )
set( GPU_RUNTIME "CUDA" )
set( ECTRANS_GPU_HIP_LIBRARIES CUDA::cufft CUDA::cublas nvhpcwrapnvtx CUDA::cudart )
list( APPEND trans_gpu_common_src
algor/hicblas_gemm.cuda.cu
algor/hicfft.cuda.cu
)
ecbuild_info("warn: IN_PLACE_FFT defined for cuFFT")
else()
ecbuild_info("warn: HIP and CUDA not found")
endif()
Expand Down Expand Up @@ -166,6 +168,11 @@ foreach( prec dp sp )
target_compile_definitions( ectrans_gpu_${prec} PRIVATE TRANS_SINGLE PARKINDTRANS_SINGLE )
endif()

# cuFFT can do in-place FFT, hipFFT cannot
if( HAVE_CUDA )
target_compile_definitions( ectrans_gpu_${prec} PRIVATE IN_PLACE_FFT )
endif()

if( HAVE_OMP AND CMAKE_Fortran_COMPILER_ID MATCHES Cray )
# Propagate flags as link options for downstream targets. Only required for Cray
target_link_options( ectrans_gpu_${prec} INTERFACE
Expand Down
2 changes: 1 addition & 1 deletion src/trans/gpu/internal/dir_trans_ctl_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,&
ALLOCATOR = MAKE_BUFFERED_ALLOCATOR()
HTRGTOL = PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS)
IF (KF_FS > 0) THEN
HFTDIR = PREPARE_FTDIR()
HFTDIR = PREPARE_FTDIR(ALLOCATOR,KF_FS)
HTRLTOM_PACK = PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS)
HTRLTOM = PREPARE_TRLTOM(ALLOCATOR, KF_FS)
HTRLTOM_UNPACK = PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS)
Expand Down
30 changes: 26 additions & 4 deletions src/trans/gpu/internal/ftdir_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,35 @@
!

MODULE FTDIR_MOD
USE BUFFERED_ALLOCATOR_MOD ,ONLY : ALLOCATION_RESERVATION_HANDLE
IMPLICIT NONE

PRIVATE
PUBLIC :: FTDIR, FTDIR_HANDLE, PREPARE_FTDIR

TYPE FTDIR_HANDLE
TYPE(ALLOCATION_RESERVATION_HANDLE) :: HREEL_COMPLEX
END TYPE
CONTAINS

FUNCTION PREPARE_FTDIR() RESULT(HFTDIR)
FUNCTION PREPARE_FTDIR(ALLOCATOR,KF_FS) RESULT(HFTDIR)
USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT
USE TPM_DISTR, ONLY: D
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE
USE ISO_C_BINDING, ONLY: C_SIZE_T

IMPLICIT NONE

TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS
TYPE(FTDIR_HANDLE) :: HFTDIR
END FUNCTION

REAL(KIND=JPRBT) :: DUMMY

#ifndef IN_PLACE_FFT
HFTDIR%HREEL_COMPLEX = RESERVE(ALLOCATOR, INT(KF_FS*D%NLENGTF*SIZEOF(DUMMY), KIND=C_SIZE_T))
#endif
END FUNCTION PREPARE_FTDIR

SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD)
!**** *FTDIR - Direct Fourier transform
Expand Down Expand Up @@ -60,12 +76,13 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD)
USE TPM_GEN, ONLY: LSYNC_TRANS
USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT
USE TPM_DISTR, ONLY: MYSETW, MYPROC, NPROC, D_NSTAGT0B, D_NSTAGTF,D_NPTRLS, &
& D_NPNTGTB0, D_NPROCM, D_NDGL_FS
& D_NPNTGTB0, D_NPROCM, D_NDGL_FS, D
USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION
USE TPM_HICFFT, ONLY: EXECUTE_DIR_FFT
USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
USE ISO_C_BINDING, ONLY: C_SIZE_T

IMPLICIT NONE

Expand All @@ -77,7 +94,12 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD)

INTEGER(KIND=JPIM) :: KGL

#ifdef IN_PLACE_FFT
PREEL_COMPLEX => PREEL_REAL
#else
CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX),&
& 1_C_SIZE_T, int(KFIELD*D%NLENGTF*SIZEOF(PREEL_COMPLEX(1)),kind=c_size_t))
#endif

#ifdef ACCGPU
!$ACC DATA PRESENT(PREEL_REAL, PREEL_COMPLEX, &
Expand Down
38 changes: 29 additions & 9 deletions src/trans/gpu/internal/ftinv_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,33 @@
!

MODULE FTINV_MOD
USE BUFFERED_ALLOCATOR_MOD ,ONLY : BUFFERED_ALLOCATOR
USE BUFFERED_ALLOCATOR_MOD ,ONLY : BUFFERED_ALLOCATOR, ALLOCATION_RESERVATION_HANDLE
IMPLICIT NONE

PRIVATE
PUBLIC :: FTINV, FTINV_HANDLE, PREPARE_FTINV

TYPE FTINV_HANDLE
TYPE(ALLOCATION_RESERVATION_HANDLE) :: HREEL_REAL
END TYPE
CONTAINS
FUNCTION PREPARE_FTINV(ALLOCATOR) RESULT(HFTINV)
FUNCTION PREPARE_FTINV(ALLOCATOR,KF_FS) RESULT(HFTINV)
USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT
USE TPM_DISTR, ONLY: D
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE
USE ISO_C_BINDING, ONLY: C_SIZE_T

IMPLICIT NONE

TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS
TYPE(FTINV_HANDLE) :: HFTINV

REAL(KIND=JPRBT) :: DUMMY

#ifndef IN_PLACE_FFT
HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, int(D%NLENGTF*KF_FS*SIZEOF(DUMMY),kind=c_size_t))
#endif
END FUNCTION

SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD)
Expand Down Expand Up @@ -59,13 +72,15 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD)
! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW
! ------------------------------------------------------------------

USE TPM_GEN, ONLY: LSYNC_TRANS
USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT
USE TPM_DISTR, ONLY: MYSETW, D_NPTRLS, D_NDGL_FS, D_NSTAGTF
USE TPM_GEOMETRY, ONLY: G_NLOEN
USE TPM_HICFFT, ONLY: EXECUTE_INV_FFT
USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
USE TPM_GEN, ONLY: LSYNC_TRANS
USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT
USE TPM_DISTR, ONLY: MYSETW, D_NPTRLS, D_NDGL_FS, D_NSTAGTF, D
USE TPM_GEOMETRY, ONLY: G_NLOEN
USE TPM_HICFFT, ONLY: EXECUTE_INV_FFT
USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
USE BUFFERED_ALLOCATOR_MOD, ONLY: ASSIGN_PTR, GET_ALLOCATION
USE ISO_C_BINDING, ONLY: C_SIZE_T

IMPLICIT NONE

Expand All @@ -77,7 +92,12 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD)

INTEGER(KIND=JPIM) :: KGL

#ifdef IN_PLACE_FFT
PREEL_REAL => PREEL_COMPLEX
#else
CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL),&
& 1_C_SIZE_T, int(KFIELD*D%NLENGTF*SIZEOF(PREEL_REAL(1)),kind=c_size_t))
#endif

#ifdef OMPGPU
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/trans/gpu/internal/inv_trans_ctl_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ SUBROUTINE INV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,&
HTRMTOL = PREPARE_TRMTOL(ALLOCATOR,IF_LEG)
HTRMTOL_UNPACK = PREPARE_TRMTOL_UNPACK(ALLOCATOR,IF_FOURIER)
HFSC = PREPARE_FSC(ALLOCATOR)
HFTINV = PREPARE_FTINV(ALLOCATOR)
HFTINV = PREPARE_FTINV(ALLOCATOR,IF_FOURIER)
ENDIF
HTRLTOG = PREPARE_TRLTOG(ALLOCATOR,IF_FOURIER,KF_GP)

Expand Down

0 comments on commit 3c6810a

Please sign in to comment.