From 265a66c09b91285f12dbec0272dfc6860657080a Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Wed, 16 Oct 2024 12:59:11 +0000 Subject: [PATCH] Switch from STORAGE_SIZE to C_SIZEOF --- .../gpu/algor/buffered_allocator_mod.F90 | 8 ++--- src/trans/gpu/algor/ext_acc.F90 | 8 ++--- src/trans/gpu/internal/ftdir_mod.F90 | 8 ++--- src/trans/gpu/internal/ftinv_mod.F90 | 8 ++--- src/trans/gpu/internal/ltdir_mod.F90 | 20 ++++++------ src/trans/gpu/internal/ltinv_mod.F90 | 32 +++++++++---------- src/trans/gpu/internal/trgtol_mod.F90 | 18 +++++------ src/trans/gpu/internal/trltog_mod.F90 | 14 ++++---- src/trans/gpu/internal/trltom_mod.F90 | 8 ++--- src/trans/gpu/internal/trltom_pack_unpack.F90 | 28 ++++++++-------- src/trans/gpu/internal/trmtol_mod.F90 | 8 ++--- src/trans/gpu/internal/trmtol_pack_unpack.F90 | 16 +++++----- 12 files changed, 88 insertions(+), 88 deletions(-) diff --git a/src/trans/gpu/algor/buffered_allocator_mod.F90 b/src/trans/gpu/algor/buffered_allocator_mod.F90 index 1fcd3873..34b9c42a 100644 --- a/src/trans/gpu/algor/buffered_allocator_mod.F90 +++ b/src/trans/gpu/algor/buffered_allocator_mod.F90 @@ -118,7 +118,7 @@ FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION) END FUNCTION GET_ALLOCATION SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) - USE ISO_C_BINDING, ONLY: C_FLOAT + USE ISO_C_BINDING, ONLY: C_FLOAT, C_F_POINTER, C_SIZEOF IMPLICIT NONE INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:) REAL(KIND=C_FLOAT), POINTER, INTENT(OUT) :: DST(:) @@ -148,10 +148,10 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE !$ACC END KERNELS!! LOOP ENDIF CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, & - & [STORAGE_SIZE(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/STORAGE_SIZE(DST(0))]) + & [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))]) END SUBROUTINE ASSIGN_PTR_FLOAT SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) - USE ISO_C_BINDING, ONLY: C_DOUBLE + USE ISO_C_BINDING, ONLY: C_DOUBLE, C_F_POINTER, C_SIZEOF IMPLICIT NONE INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:) REAL(KIND=C_DOUBLE), POINTER, INTENT(OUT) :: DST(:) @@ -181,6 +181,6 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU !$ACC END KERNELS!! LOOP ENDIF CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, & - & [STORAGE_SIZE(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/STORAGE_SIZE(DST(0))]) + & [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))]) END SUBROUTINE ASSIGN_PTR_DOUBLE END MODULE diff --git a/src/trans/gpu/algor/ext_acc.F90 b/src/trans/gpu/algor/ext_acc.F90 index e81a164d..076566b0 100644 --- a/src/trans/gpu/algor/ext_acc.F90 +++ b/src/trans/gpu/algor/ext_acc.F90 @@ -268,7 +268,7 @@ subroutine ext_acc_create(ptrs, stream) num_ranges = get_common_pointers(ptrs, common_ptrs) do i = 1, num_ranges - call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/(storage_size(pp(1))/8)]) + call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))]) !!call acc_create_async(pp, common_ptrs(i)%sz, async=stream_act) call acc_create(pp, int(common_ptrs(i)%sz)) enddo @@ -295,7 +295,7 @@ subroutine ext_acc_copyin(ptrs, stream) num_ranges = get_common_pointers(ptrs, common_ptrs) do i = 1, num_ranges - call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/(storage_size(pp(1))/8)]) + call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))]) !!call acc_copyin_async(pp, common_ptrs(i)%sz, async=stream_act) call acc_copyin(pp, int(common_ptrs(i)%sz)) enddo @@ -322,7 +322,7 @@ subroutine ext_acc_copyout(ptrs, stream) num_ranges = get_common_pointers(ptrs, common_ptrs) do i = 1, num_ranges - call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/(storage_size(pp(1))/8)]) + call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))]) !!call acc_copyout_async(pp, common_ptrs(i)%sz, async=stream_act) call acc_copyout(pp, int(common_ptrs(i)%sz)) enddo @@ -349,7 +349,7 @@ subroutine ext_acc_delete(ptrs, stream) num_ranges = get_common_pointers(ptrs, common_ptrs) do i = 1, num_ranges - call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/(storage_size(pp(1))/8)]) + call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))]) !!call acc_delete_async(pp, common_ptrs(i)%sz, async=stream_act) call acc_delete(pp, int(common_ptrs(i)%sz)) enddo diff --git a/src/trans/gpu/internal/ftdir_mod.F90 b/src/trans/gpu/internal/ftdir_mod.F90 index a95d801e..b182a7e3 100755 --- a/src/trans/gpu/internal/ftdir_mod.F90 +++ b/src/trans/gpu/internal/ftdir_mod.F90 @@ -25,7 +25,7 @@ FUNCTION PREPARE_FTDIR(ALLOCATOR,KF_FS) RESULT(HFTDIR) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -36,7 +36,7 @@ FUNCTION PREPARE_FTDIR(ALLOCATOR,KF_FS) RESULT(HFTDIR) REAL(KIND=JPRBT) :: DUMMY #ifndef IN_PLACE_FFT - HFTDIR%HREEL_COMPLEX = RESERVE(ALLOCATOR, INT(KF_FS*D%NLENGTF*STORAGE_SIZE(DUMMY)/8, KIND=C_SIZE_T)) + HFTDIR%HREEL_COMPLEX = RESERVE(ALLOCATOR, INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(DUMMY)) #endif END FUNCTION PREPARE_FTDIR @@ -82,7 +82,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) USE TPM_HICFFT, ONLY: EXECUTE_DIR_FFT USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -98,7 +98,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) PREEL_COMPLEX => PREEL_REAL #else CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX),& - & 1_C_SIZE_T, INT(KFIELD*D%NLENGTF*STORAGE_SIZE(PREEL_COMPLEX(1))/8,KIND=C_SIZE_T)) + & 1_C_SIZE_T, INT(KFIELD*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(PREEL_COMPLEX(1))) #endif #ifdef ACCGPU diff --git a/src/trans/gpu/internal/ftinv_mod.F90 b/src/trans/gpu/internal/ftinv_mod.F90 index 7580dae9..b6bb0a11 100755 --- a/src/trans/gpu/internal/ftinv_mod.F90 +++ b/src/trans/gpu/internal/ftinv_mod.F90 @@ -24,7 +24,7 @@ FUNCTION PREPARE_FTINV(ALLOCATOR,KF_FS) RESULT(HFTINV) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -35,7 +35,7 @@ FUNCTION PREPARE_FTINV(ALLOCATOR,KF_FS) RESULT(HFTINV) REAL(KIND=JPRBT) :: DUMMY #ifndef IN_PLACE_FFT - HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, INT(D%NLENGTF*KF_FS*STORAGE_SIZE(DUMMY)/8,KIND=C_SIZE_T)) + HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, INT(D%NLENGTF*KF_FS,KIND=C_SIZE_T)*C_SIZEOF(DUMMY)) #endif END FUNCTION @@ -80,7 +80,7 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -96,7 +96,7 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) PREEL_REAL => PREEL_COMPLEX #else CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL),& - & 1_C_SIZE_T, INT(KFIELD*D%NLENGTF*STORAGE_SIZE(PREEL_REAL(1))/8,KIND=C_SIZE_T)) + & 1_C_SIZE_T, INT(KFIELD*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(PREEL_REAL(1))) #endif #ifdef OMPGPU diff --git a/src/trans/gpu/internal/ltdir_mod.F90 b/src/trans/gpu/internal/ltdir_mod.F90 index 6bef8655..293bf673 100755 --- a/src/trans/gpu/internal/ltdir_mod.F90 +++ b/src/trans/gpu/internal/ltdir_mod.F90 @@ -26,7 +26,7 @@ MODULE LTDIR_MOD FUNCTION PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) RESULT(HLTDIR) USE TPM_DISTR, ONLY: D USE TPM_DIM, ONLY: R - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE LEDIR_MOD, ONLY: LEDIR_STRIDES USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE @@ -47,13 +47,13 @@ FUNCTION PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) RESULT(HLTDIR) IOUT0_STRIDES0=IOUT0_STRIDES0,IOUT0_SIZE=IOUT0_SIZE) ! POA1 - IALLOC_SZ = ALIGN(2*KF_FS*(R%NTMAX+3)*D%NUMP*STORAGE_SIZE(ZPRBT_DUMMY)/8,128) + IALLOC_SZ = ALIGN(INT(2*KF_FS*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128) ! POA2 - IALLOC_SZ = IALLOC_SZ + ALIGN(4*KF_UV*(R%NTMAX+3)*D%NUMP*STORAGE_SIZE(ZPRBT_DUMMY)/8,128) + IALLOC_SZ = IALLOC_SZ + ALIGN(INT(4*KF_UV*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128) ! ZOUT - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*STORAGE_SIZE(ZPRBT_DUMMY)/8,128) + IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128) ! ZOUT0 - IALLOC_SZ = IALLOC_SZ+ ALIGN(IOUT0_SIZE*STORAGE_SIZE(ZPRD_DUMMY)/8,128) + IALLOC_SZ = IALLOC_SZ+ ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128) HLTDIR%HOUT_AND_POA = RESERVE(ALLOCATOR, IALLOC_SZ) END FUNCTION PREPARE_LTDIR @@ -77,7 +77,7 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA USE TPM_TRANS, ONLY: NF_SC2, NF_SC3A, NF_SC3B USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T, C_F_POINTER, C_LOC + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_F_POINTER, C_LOC, C_SIZEOF !**** *LTDIR* - Control of Direct Legendre transform step @@ -186,26 +186,26 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA IALLOC_POS = 1 - IALLOC_SZ = ALIGN(2*KF_FS*(R%NTMAX+3)*D%NUMP*STORAGE_SIZE(POA1_L(1))/8,128) + IALLOC_SZ = ALIGN(INT(2*KF_FS*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(POA1_L(1)),128) CALL ASSIGN_PTR(POA1_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) CALL C_F_POINTER(C_LOC(POA1_L), POA1, (/ 2*KF_FS, R%NTMAX+3, D%NUMP /)) IALLOC_POS = IALLOC_POS + IALLOC_SZ - IALLOC_SZ = ALIGN(4*KF_UV*(R%NTMAX+3)*D%NUMP*STORAGE_SIZE(POA2_L(1))/8,128) + IALLOC_SZ = ALIGN(INT(4*KF_UV*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(POA2_L(1)),128) CALL ASSIGN_PTR(POA2_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) CALL C_F_POINTER(C_LOC(POA2_L), POA2, (/ 4*KF_UV, R%NTMAX+3, D%NUMP /)) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUT - IALLOC_SZ = ALIGN(IOUT_SIZE*STORAGE_SIZE(ZOUT(1))/8,128) + IALLOC_SZ = ALIGN(INT(IOUT_SIZE,C_SIZE_T)*C_SIZEOF(ZOUT(1)),128) CALL ASSIGN_PTR(ZOUT, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUT0 - IALLOC_SZ = ALIGN(IOUT0_SIZE*STORAGE_SIZE(ZOUT0(1))/8,128) + IALLOC_SZ = ALIGN(INT(IOUT0_SIZE,C_SIZE_T)*C_SIZEOF(ZOUT0(1)),128) CALL ASSIGN_PTR(ZOUT0, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) IALLOC_POS = IALLOC_POS + IALLOC_SZ diff --git a/src/trans/gpu/internal/ltinv_mod.F90 b/src/trans/gpu/internal/ltinv_mod.F90 index 29fd7f62..11ed079a 100755 --- a/src/trans/gpu/internal/ltinv_mod.F90 +++ b/src/trans/gpu/internal/ltinv_mod.F90 @@ -28,7 +28,7 @@ FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT( USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD USE TPM_DISTR, ONLY: D USE TPM_DIM, ONLY: R - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE LEINV_MOD, ONLY: LEINV_STRIDES USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE @@ -63,7 +63,7 @@ FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT( IF (LSCDERS) & IF_READIN = IF_READIN + KF_SCALARS ! Scalars NS Derivatives - IPIA_SZ = ALIGN(2*IF_READIN*(R%NSMAX+3)*D%NUMP*STORAGE_SIZE(ZPRBT_DUMMY)/8,128) + IPIA_SZ = ALIGN(INT(2*IF_READIN*(R%NSMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128) ! In Legendre space, we then ignore vorticity/divergence, if ! they don't need to be transformed. @@ -77,21 +77,21 @@ FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT( ! PIA IALLOC_SZ = IPIA_SZ ! ZINP - IALLOC_SZ = IALLOC_SZ + ALIGN(IIN_SIZE*STORAGE_SIZE(ZPRBT_DUMMY)/8,128) + IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IIN_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128) ! ZINP0 - IALLOC_SZ = IALLOC_SZ + ALIGN(IIN0_SIZE*STORAGE_SIZE(ZPRD_DUMMY)/8,128) + IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IIN0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128) HLTINV%HPIA_AND_IN = RESERVE(ALLOCATOR, IALLOC_SZ) IALLOC_SZ = 0 ! ZOUTA - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*STORAGE_SIZE(ZPRBT_DUMMY)/8,128) + IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128) ! ZOUTS - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*STORAGE_SIZE(ZPRBT_DUMMY)/8,128) + IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128) ! ZOUTA0 - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT0_SIZE*STORAGE_SIZE(ZPRD_DUMMY)/8,128) + IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128) ! ZOUTS0 - IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT0_SIZE*STORAGE_SIZE(ZPRD_DUMMY)/8,128) + IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128) HLTINV%HOUTS_AND_OUTA = RESERVE(ALLOCATOR, IALLOC_SZ) @@ -118,7 +118,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_GEN, ONLY: LSYNC_TRANS USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX - USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC, C_SIZEOF !**** *LTINV* - Inverse Legendre transform ! @@ -233,20 +233,20 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& IALLOC_POS = 1 ! PIA - IALLOC_SZ = ALIGN(2*IF_READIN*(R%NTMAX+3)*D%NUMP*STORAGE_SIZE(PIA_L(1))/8,128) + IALLOC_SZ = ALIGN(INT(2*IF_READIN*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(PIA_L(1)),128) CALL ASSIGN_PTR(PIA_L, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),& & IALLOC_POS, IALLOC_SZ) CALL C_F_POINTER(C_LOC(PIA_L), PIA, (/ 2*IF_READIN, R%NTMAX+3, D%NUMP /)) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZINP - IALLOC_SZ = ALIGN(IIN_SIZE*STORAGE_SIZE(ZINP(1))/8,128) + IALLOC_SZ = ALIGN(INT(IIN_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZINP(1)),128) CALL ASSIGN_PTR(ZINP, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZINP0 - IALLOC_SZ = ALIGN(IIN0_SIZE*STORAGE_SIZE(ZINP0(1))/8,128) + IALLOC_SZ = ALIGN(INT(IIN0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZINP0(1)),128) CALL ASSIGN_PTR(ZINP0, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ @@ -254,25 +254,25 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& IALLOC_POS = 1 ! ZOUTA - IALLOC_SZ = ALIGN(IOUT_SIZE*STORAGE_SIZE(ZOUTA(1))/8,128) + IALLOC_SZ = ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZOUTA(1)),128) CALL ASSIGN_PTR(ZOUTA, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUTS - IALLOC_SZ = ALIGN(IOUT_SIZE*STORAGE_SIZE(ZOUTS(1))/8,128) + IALLOC_SZ = ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZOUTS(1)),128) CALL ASSIGN_PTR(ZOUTS, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUTA0 - IALLOC_SZ = ALIGN(IOUT0_SIZE*STORAGE_SIZE(ZOUTA0(1))/8,128) + IALLOC_SZ = ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZOUTA0(1)),128) CALL ASSIGN_PTR(ZOUTA0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUTS0 - IALLOC_SZ = ALIGN(IOUT0_SIZE*STORAGE_SIZE(ZOUTS0(1))/8,128) + IALLOC_SZ = ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZOUTS0(1)),128) CALL ASSIGN_PTR(ZOUTS0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 7e62ce1b..35ec233a 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -25,7 +25,7 @@ FUNCTION PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) RESULT(HTRGTOL) USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -37,10 +37,10 @@ FUNCTION PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) RESULT(HTRGTOL) INTEGER(KIND=C_SIZE_T) :: NELEM - HTRGTOL%HCOMBUFS = RESERVE(ALLOCATOR, int(KF_GP*D%NGPTOT*STORAGE_SIZE(DUMMY)/8,kind=c_size_t)) + HTRGTOL%HCOMBUFS = RESERVE(ALLOCATOR, INT(KF_GP*D%NGPTOT,KIND=C_SIZE_T)*C_SIZEOF(DUMMY)) - NELEM = KF_FS*D%NLENGTF*STORAGE_SIZE(DUMMY)/8 ! ZCOMBUFR - NELEM = NELEM + KF_FS*D%NLENGTF*STORAGE_SIZE(DUMMY)/8 ! PREEL_REAL + NELEM = INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(DUMMY) ! ZCOMBUFR + NELEM = NELEM + INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(DUMMY) ! PREEL_REAL HTRGTOL%HCOMBUFR_AND_REEL = RESERVE(ALLOCATOR, NELEM) END FUNCTION PREPARE_TRGTOL @@ -118,7 +118,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE TPM_TRANS, ONLY: NPROMA - USE ISO_C_BINDING, ONLY: C_SIZE_T, C_FLOAT, C_DOUBLE, C_INT8_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_FLOAT, C_DOUBLE, C_INT8_T, C_SIZEOF USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE @@ -324,8 +324,8 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, block CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL),& - & int(KF_FS*D%NLENGTF*STORAGE_SIZE(PREEL_REAL(1))/8+1,kind=c_size_t), & - & int(KF_FS*D%NLENGTF*STORAGE_SIZE(PREEL_REAL(1))/8,kind=c_size_t)) + & INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(PREEL_REAL(1))+1_C_SIZE_T, & + & INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(PREEL_REAL(1))) !!CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL), size1, size2) end block @@ -456,7 +456,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IF (ISEND_COUNTS > 0) THEN CALL ASSIGN_PTR(ZCOMBUFS, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFS),& - & 1_C_SIZE_T, int(ICOMBUFS_OFFSET(ISEND_COUNTS+1)*STORAGE_SIZE(ZCOMBUFS(1))/8,kind=c_size_t)) + & 1_C_SIZE_T, INT(ICOMBUFS_OFFSET(ISEND_COUNTS+1),KIND=C_SIZE_T)*C_SIZEOF(ZCOMBUFS(1))) ENDIF !....Pack loop......................................................... @@ -565,7 +565,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, CALL GSTATS(411,0) IF (IRECV_COUNTS > 0) THEN CALL ASSIGN_PTR(ZCOMBUFR, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL),& - & 1_C_SIZE_T, int(ICOMBUFR_OFFSET(IRECV_COUNTS+1)*STORAGE_SIZE(ZCOMBUFR(1))/8,kind=c_size_t)) + & 1_C_SIZE_T, INT(ICOMBUFR_OFFSET(IRECV_COUNTS+1),KIND=C_SIZE_T)*C_SIZEOF(ZCOMBUFR(1))) ENDIF #ifdef OMPGPU #endif diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index db5ae67e..a4aa52f1 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -25,7 +25,7 @@ FUNCTION PREPARE_TRLTOG(ALLOCATOR,KF_FS,KF_GP) RESULT(HTRLTOG) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -37,8 +37,8 @@ FUNCTION PREPARE_TRLTOG(ALLOCATOR,KF_FS,KF_GP) RESULT(HTRLTOG) INTEGER(KIND=C_SIZE_T) :: NELEM - NELEM = ALIGN(KF_GP*D%NGPTOT*STORAGE_SIZE(DUMMY)/8,128) ! ZCOMBUFR - NELEM = ALIGN(NELEM + KF_FS*D%NLENGTF*STORAGE_SIZE(DUMMY)/8,128) !ZCOMBUFS upper obund + NELEM = ALIGN(INT(KF_GP*D%NGPTOT,KIND=C_SIZE_T)*C_SIZEOF(DUMMY),128) ! ZCOMBUFR + NELEM = ALIGN(NELEM + INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(DUMMY),128) !ZCOMBUFS upper bound HTRLTOG%HCOMBUFR_AND_COMBUFS = RESERVE(ALLOCATOR, NELEM) END FUNCTION PREPARE_TRLTOG @@ -120,7 +120,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE TPM_TRANS, ONLY: LDIVGP, LSCDERS, LUVDER, LVORGP, NPROMA USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE USE OPENACC, ONLY: ACC_HANDLE_KIND @@ -644,12 +644,12 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IF (IRECV_COUNTS > 0) THEN CALL ASSIGN_PTR(ZCOMBUFR, GET_ALLOCATION(ALLOCATOR, HTRLTOG%HCOMBUFR_AND_COMBUFS),& - & 1_C_SIZE_T, int(ICOMBUFR_OFFSET(IRECV_COUNTS+1)*STORAGE_SIZE(ZCOMBUFR(1))/8,kind=c_size_t)) + & 1_C_SIZE_T, INT(ICOMBUFR_OFFSET(IRECV_COUNTS+1),KIND=C_SIZE_T)*C_SIZEOF(ZCOMBUFR(1))) ENDIF IF (ISEND_COUNTS > 0) THEN CALL ASSIGN_PTR(ZCOMBUFS, GET_ALLOCATION(ALLOCATOR, HTRLTOG%HCOMBUFR_AND_COMBUFS),& - & int(ALIGN(KF_GP*D%NGPTOT*STORAGE_SIZE(ZCOMBUFR(1))/8,128)+1,kind=c_size_t), & - & int(ICOMBUFS_OFFSET(ISEND_COUNTS+1)*STORAGE_SIZE(ZCOMBUFS(1))/8,kind=c_size_t)) + & ALIGN(INT(KF_GP*D%NGPTOT,KIND=C_SIZE_T)*C_SIZEOF(ZCOMBUFR(1)),128)+1_C_SIZE_T, & + & INT(ICOMBUFS_OFFSET(ISEND_COUNTS+1),KIND=C_SIZE_T)*C_SIZEOF(ZCOMBUFS(1))) ENDIF #ifdef OMPGPU diff --git a/src/trans/gpu/internal/trltom_mod.F90 b/src/trans/gpu/internal/trltom_mod.F90 index 7d5e2dec..1f62c066 100755 --- a/src/trans/gpu/internal/trltom_mod.F90 +++ b/src/trans/gpu/internal/trltom_mod.F90 @@ -24,7 +24,7 @@ FUNCTION PREPARE_TRLTOM(ALLOCATOR, KF_FS) RESULT(HTRLTOM) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -34,7 +34,7 @@ FUNCTION PREPARE_TRLTOM(ALLOCATOR, KF_FS) RESULT(HTRLTOM) REAL(KIND=JPRBT) :: DUMMY - HTRLTOM%HPFBUF = RESERVE(ALLOCATOR, int(D%NLENGT1B*2*KF_FS*STORAGE_SIZE(DUMMY)/8,kind=c_size_t)) + HTRLTOM%HPFBUF = RESERVE(ALLOCATOR, INT(D%NLENGT1B*2*KF_FS,KIND=C_SIZE_T)*C_SIZEOF(DUMMY)) END FUNCTION SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) @@ -98,7 +98,7 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) #endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -131,7 +131,7 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) IF (LHOOK) CALL DR_HOOK('TRLTOM',0,ZHOOK_HANDLE) CALL ASSIGN_PTR(PFBUF, GET_ALLOCATION(ALLOCATOR, HTRLTOM%HPFBUF),& - & 1_C_SIZE_T, int(D%NLENGT1B*2*KF_FS*STORAGE_SIZE(PFBUF(1))/8,kind=c_size_t)) + & 1_C_SIZE_T, INT(D%NLENGT1B*2*KF_FS,KIND=C_SIZE_T)*C_SIZEOF(PFBUF(1))) #ifdef OMPGPU #endif diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index 376fb91d..d77da0f8 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -32,7 +32,7 @@ MODULE TRLTOM_PACK_UNPACK FUNCTION PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_PACK) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE IMPLICIT NONE @@ -43,7 +43,7 @@ FUNCTION PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_PACK) REAL(KIND=JPRBT) :: DUMMY - HTRLTOM_PACK%HFOUBUF_IN = RESERVE(ALLOCATOR, int(D%NLENGT0B*KF_FS*2*STORAGE_SIZE(DUMMY)/8,kind=c_size_t)) + HTRLTOM_PACK%HFOUBUF_IN = RESERVE(ALLOCATOR, INT(D%NLENGT0B*KF_FS*2,KIND=C_SIZE_T)*C_SIZEOF(DUMMY)) END FUNCTION PREPARE_TRLTOM_PACK SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) @@ -74,7 +74,7 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) USE TPM_DISTR, ONLY: D, MYSETW, D_NSTAGTF, D_NPNTGTB0, D_NPTRLS, D_NDGL_FS USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN USE TPM_DIM, ONLY: R_NSMAX - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF ! IMPLICIT NONE @@ -90,7 +90,7 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) REAL(KIND=JPRBT) :: SCAL CALL ASSIGN_PTR(FOUBUF_IN, GET_ALLOCATION(ALLOCATOR, HTRLTOM_PACK%HFOUBUF_IN),& - & 1_C_SIZE_T, int(D%NLENGT0B*KF_FS*2*STORAGE_SIZE(FOUBUF_IN(1))/8,kind=c_size_t)) + & 1_C_SIZE_T, INT(D%NLENGT0B*KF_FS*2,KIND=C_SIZE_T)*C_SIZEOF(FOUBUF_IN(1))) #ifdef OMPGPU #endif @@ -137,7 +137,7 @@ FUNCTION PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_UNPACK) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE USE LEDIR_MOD, ONLY: LEDIR_STRIDES - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -156,10 +156,10 @@ FUNCTION PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_UNPACK) IIN0_STRIDES0=IIN0_STRIDES0,IIN0_SIZE=IIN0_SIZE) ! Check if the reuse buffer is large enough - ISIZE = ALIGN(IIN_SIZE*STORAGE_SIZE(ZPRBT_DUMMY)/8,128) - ISIZE = ISIZE + ALIGN(IIN_SIZE*STORAGE_SIZE(ZPRBT_DUMMY)/8,128) - ISIZE = ISIZE + ALIGN(IIN0_SIZE*STORAGE_SIZE(ZPRD_DUMMY)/8,128) - ISIZE = ISIZE + ALIGN(IIN0_SIZE*STORAGE_SIZE(ZPRD_DUMMY)/8,128) + ISIZE = ALIGN(INT(IIN_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128) + ISIZE = ISIZE + ALIGN(INT(IIN_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128) + ISIZE = ISIZE + ALIGN(INT(IIN0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128) + ISIZE = ISIZE + ALIGN(INT(IIN0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128) HTRLTOM_UNPACK%HINPS_AND_ZINPA = RESERVE(ALLOCATOR, ISIZE) END FUNCTION PREPARE_TRLTOM_UNPACK @@ -172,7 +172,7 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP USE TPM_FIELDS_FLAT, ONLY: F_RW, F_RACTHE USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_NPNTGTB1, D_OFFSETS_GEMM1 USE LEDIR_MOD, ONLY: LEDIR_STRIDES - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -200,22 +200,22 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP IALLOC_POS=1 - IALLOC_SZ = ALIGN(IIN_SIZE*STORAGE_SIZE(ZINPS(0))/8,128) + IALLOC_SZ = ALIGN(INT(IIN_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZINPS(0)),128) CALL ASSIGN_PTR(ZINPS, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ - IALLOC_SZ = ALIGN(IIN_SIZE*STORAGE_SIZE(ZINPA(0))/8,128) + IALLOC_SZ = ALIGN(INT(IIN_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZINPA(0)),128) CALL ASSIGN_PTR(ZINPA, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ - IALLOC_SZ = ALIGN(IIN0_SIZE*STORAGE_SIZE(ZINPS0(0))/8,128) + IALLOC_SZ = ALIGN(INT(IIN0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZINPS0(0)),128) CALL ASSIGN_PTR(ZINPS0, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ - IALLOC_SZ = ALIGN(IIN0_SIZE*STORAGE_SIZE(ZINPA0(0))/8,128) + IALLOC_SZ = ALIGN(INT(IIN0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZINPA0(0)),128) CALL ASSIGN_PTR(ZINPA0, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 index 291ed644..d4860ddf 100755 --- a/src/trans/gpu/internal/trmtol_mod.F90 +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -24,7 +24,7 @@ FUNCTION PREPARE_TRMTOL(ALLOCATOR, KF_LEG) RESULT(HTRMTOL) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -34,7 +34,7 @@ FUNCTION PREPARE_TRMTOL(ALLOCATOR, KF_LEG) RESULT(HTRMTOL) REAL(KIND=JPRBT) :: DUMMY - HTRMTOL%HPFBUF = RESERVE(ALLOCATOR, int(D%NLENGT0B*2*KF_LEG*STORAGE_SIZE(DUMMY)/8,kind=c_size_t)) + HTRMTOL%HPFBUF = RESERVE(ALLOCATOR, INT(D%NLENGT0B*2*KF_LEG,KIND=C_SIZE_T)*C_SIZEOF(DUMMY)) END FUNCTION SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) @@ -98,7 +98,7 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) #endif USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -132,7 +132,7 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) IF (LHOOK) CALL DR_HOOK('TRMTOL',0,ZHOOK_HANDLE) CALL ASSIGN_PTR(PFBUF, GET_ALLOCATION(ALLOCATOR, HTRMTOL%HPFBUF),& - & 1_C_SIZE_T, int(D%NLENGT0B*2*KF_LEG*STORAGE_SIZE(PFBUF(1))/8,kind=c_size_t)) + & 1_C_SIZE_T, INT(D%NLENGT0B*2*KF_LEG,KIND=C_SIZE_T)*C_SIZEOF(PFBUF(1))) IF(NPROC > 1) THEN DO J=1,NPRTRW diff --git a/src/trans/gpu/internal/trmtol_pack_unpack.F90 b/src/trans/gpu/internal/trmtol_pack_unpack.F90 index d8648c58..4468af60 100755 --- a/src/trans/gpu/internal/trmtol_pack_unpack.F90 +++ b/src/trans/gpu/internal/trmtol_pack_unpack.F90 @@ -28,7 +28,7 @@ MODULE TRMTOL_PACK_UNPACK FUNCTION PREPARE_TRMTOL_PACK(ALLOCATOR,KF_LEG) RESULT(HTRMTOL_PACK) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE IMPLICIT NONE @@ -42,7 +42,7 @@ FUNCTION PREPARE_TRMTOL_PACK(ALLOCATOR,KF_LEG) RESULT(HTRMTOL_PACK) REAL(KIND=JPRBT) :: ZPRBT_DUMMY - IALLOC_SZ = D%NLENGT1B*2*KF_LEG*STORAGE_SIZE(ZPRBT_DUMMY)/8 + IALLOC_SZ = INT(D%NLENGT1B*2*KF_LEG,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY) HTRMTOL_PACK%HFOUBUF_IN = RESERVE(ALLOCATOR, int(IALLOC_SZ,kind=c_size_t)) END FUNCTION SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_IN,KF_LEG) @@ -91,7 +91,7 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS, D_NPNTGTB1, D_OFFSETS_GEMM1 USE LEINV_MOD, ONLY: LEINV_STRIDES USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -116,7 +116,7 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I IF (LHOOK) CALL DR_HOOK('TRMTOL_PACK',0,ZHOOK_HANDLE) CALL ASSIGN_PTR(FOUBUF_IN, GET_ALLOCATION(ALLOCATOR, HTRMTOL_PACK%HFOUBUF_IN),& - & 1_C_SIZE_T, int(D%NLENGT1B*2*KF_LEG*STORAGE_SIZE(FOUBUF_IN(1))/8,kind=c_size_t)) + & 1_C_SIZE_T, INT(D%NLENGT1B*2*KF_LEG,KIND=C_SIZE_T)*C_SIZEOF(FOUBUF_IN(1))) CALL LEINV_STRIDES(KF_LEG,IOUT_STRIDES0=IOUT_STRIDES0,IOUT_SIZE=IOUT_SIZE,& IOUT0_STRIDES0=IOUT0_STRIDES0,IOUT0_SIZE=IOUT0_SIZE) @@ -180,7 +180,7 @@ FUNCTION PREPARE_TRMTOL_UNPACK(ALLOCATOR,KF_FS) RESULT(HTRMTOL_UNPACK) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DISTR, ONLY: D USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF IMPLICIT NONE @@ -191,7 +191,7 @@ FUNCTION PREPARE_TRMTOL_UNPACK(ALLOCATOR,KF_FS) RESULT(HTRMTOL_UNPACK) REAL(KIND=JPRBT) :: DUMMY - HTRMTOL_UNPACK%HREEL = RESERVE(ALLOCATOR, int(D%NLENGTF*KF_FS*STORAGE_SIZE(DUMMY)/8,kind=c_size_t)) + HTRMTOL_UNPACK%HREEL = RESERVE(ALLOCATOR, INT(D%NLENGTF*KF_FS,KIND=C_SIZE_T)*C_SIZEOF(DUMMY)) END FUNCTION PREPARE_TRMTOL_UNPACK SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURRENT,KF_TOTAL) @@ -227,7 +227,7 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN USE TPM_DISTR, ONLY: D, MYSETW, D_NSTAGTF, D_NPNTGTB0, D_NPTRLS, D_NDGL_FS USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN, G_NLOEN_MAX USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION -USE ISO_C_BINDING, ONLY: C_SIZE_T +USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF ! IMPLICIT NONE @@ -242,7 +242,7 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN REAL(KIND=JPRBT) :: RET_REAL, RET_COMPLEX CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HTRMTOL_UNPACK%HREEL),& - & 1_C_SIZE_T, int(KF_TOTAL*D%NLENGTF*STORAGE_SIZE(PREEL_COMPLEX(1))/8,kind=c_size_t)) + & 1_C_SIZE_T, INT(KF_TOTAL*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(PREEL_COMPLEX(1))) #ifdef OMPGPU #endif