From 27713625ef94bb7917a0fb934c341ede52122c89 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Tue, 21 May 2024 15:16:13 +0000 Subject: [PATCH 01/17] Move data transfers up to WAMODEL --- src/ecwam/wamintgr_loki_gpu.F90 | 36 +-------------------------------- src/ecwam/wamodel.F90 | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index ba2659be..8aace0ff 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -40,7 +40,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & USE YOWPARAM , ONLY : NANG, NFRE USE YOWPCONS , ONLY : EPSMIN USE YOWSTAT , ONLY : CDTPRO, IDELPRO, IDELT, IDELWI, LLSOURCE, TIME_PROPAG, TIME_PHYS, & - & TIME_OFFLOAD, LUPDATE_GPU_GLOBALS + & LUPDATE_GPU_GLOBALS USE YOWWIND , ONLY : CDAWIFL, CDATEWO, CDATEFL USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK @@ -91,31 +91,10 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !* PROPAGATION TIME ! ---------------- -TIME0=-WAM_USER_CLOCK() - IF(LUPDATE_GPU_GLOBALS)THEN !$loki update_device ENDIF -CALL VARS_4D%SYNC_DEVICE_RDWR() -CALL BLK2GLO%SYNC_DEVICE_RDONLY() -CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & -& WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & -& CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) -CALL FF_NEXT%SYNC_DEVICE_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & -& WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & -& CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) -CALL WVPRPT%SYNC_DEVICE_RDWR() -CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & -& EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) -CALL WAM2NEMO%SYNC_DEVICE_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & -& NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & -& NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) -CALL INTFLDS%SYNC_DEVICE_RDWR(WSEMEAN=.TRUE., WSFMEAN=.TRUE., USTOKES=.TRUE., & -& VSTOKES=.TRUE., STRNMS=.TRUE., TAUXD=.TRUE., TAUYD=.TRUE., TAUOCXD=.TRUE., & -& TAUOCYD=.TRUE., TAUOC=.TRUE., PHIOCD=.TRUE., PHIEPS=.TRUE., PHIAW=.TRUE.) -CALL MIJ%SYNC_DEVICE_RDWR() -TIME_OFFLOAD = TIME_OFFLOAD + (TIME0+WAM_USER_CLOCK())*1.E-06 !$acc data present(VARS_4D, WVPRPT, WVENVI, BLK2GLO) IF (CDATE == CDTPRA) THEN @@ -211,19 +190,6 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & LUPDATE_GPU_GLOBALS = .FALSE. -TIME0=-WAM_USER_CLOCK() -CALL WVPRPT%SYNC_HOST_RDWR() -CALL WVENVI%SYNC_HOST_RDWR() -CALL FF_NOW%SYNC_HOST_RDWR() -CALL FF_NEXT%SYNC_HOST_RDWR() -CALL WAM2NEMO%SYNC_HOST_RDWR() -CALL INTFLDS%SYNC_HOST_RDWR() -CALL VARS_4D%SYNC_HOST_RDWR() -CALL MIJ%SYNC_HOST_RDWR() -CALL BLK2GLO%SYNC_HOST_RDWR() - -TIME_OFFLOAD = TIME_OFFLOAD + (TIME0+WAM_USER_CLOCK())*1.E-06 - IF (LHOOK) CALL DR_HOOK('WAMINTGR',1,ZHOOK_HANDLE) END SUBROUTINE WAMINTGR_LOKI_GPU diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index 5985c02a..84769b0a 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -86,6 +86,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & USE MPL_MODULE, ONLY : MPL_BARRIER USE WAM_MULTIO_MOD, ONLY : WAM_MULTIO_FLUSH USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK + USE YOWABORT , ONLY : WAM_ABORT ! ---------------------------------------------------------------------- @@ -203,6 +204,24 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & !* 1.1 FIX END DATE OF THIS PROPAGATION STEP AND OUTPUT TIMES. ! ------------------------------------------------------- + CALL VARS_4D%SYNC_DEVICE_RDWR() + CALL BLK2GLO%SYNC_DEVICE_RDONLY() + CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & + & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) + CALL FF_NEXT%SYNC_DEVICE_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & + & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) + CALL WVPRPT%SYNC_DEVICE_RDWR() + CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & + & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) + CALL WAM2NEMO%SYNC_DEVICE_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & + & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & + & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) + CALL INTFLDS%SYNC_DEVICE_RDWR(WSEMEAN=.TRUE., WSFMEAN=.TRUE., USTOKES=.TRUE., & + & VSTOKES=.TRUE., STRNMS=.TRUE., TAUXD=.TRUE., TAUYD=.TRUE., TAUOCXD=.TRUE., & + & TAUOCYD=.TRUE., TAUOC=.TRUE., PHIOCD=.TRUE., PHIEPS=.TRUE., PHIAW=.TRUE.) + CALL MIJ%SYNC_DEVICE_RDWR() CDTPRA = CDTPRO CALL INCDATE(CDTPRO, IDELPRO) @@ -288,6 +307,11 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & !NEST (not used at ECMWF) !* 1.4.1 INPUT OF BOUNDARY VALUES. ! ------------------------- +#ifdef _OPENACC + IF(IBOUNF == 1)THEN + CALL WAM_ABORT("WAMODEL: IBOUNF==1 NOT SUPPORTED FOR GPU OFFLOAD") + ENDIF +#endif IF (IBOUNF == 1) CALL BOUINPT (IU02, VARS_4D%FL1, NBLKS, NBLKE) !* 1.4.2 OUTPUT OF BOUNDARY POINTS. ! -------------------------- @@ -299,9 +323,21 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ! ---------------------------------------- IF ( NGOUT > 0 .AND. (CDTINTT == CDTPRO .OR. LRST) ) THEN ! OUTPUT POINT SPECTRA (not usually used at ECMWF) + CALL VARS_4D%F_FL1%SYNC_HOST_RDONLY() + CALL FF_NOW%SYNC_HOST_RDONLY() + CALL OUTWPSP (VARS_4D%FL1, FF_NOW) ENDIF + CALL WVPRPT%SYNC_HOST_RDWR() + CALL WVENVI%SYNC_HOST_RDWR() + CALL FF_NOW%SYNC_HOST_RDWR() + CALL FF_NEXT%SYNC_HOST_RDWR() + CALL WAM2NEMO%SYNC_HOST_RDWR() + CALL INTFLDS%SYNC_HOST_RDWR() + CALL VARS_4D%SYNC_HOST_RDWR() + CALL MIJ%SYNC_HOST_RDWR() + CALL BLK2GLO%SYNC_HOST_RDWR() ! 1.6 COMPUTE OUTPUT PARAMETERS FIELDS AND PRINT OUT NORMS ! ---------------------------------------------------- From fcf8392220a9b6ee21f8aa2d02ba04c006407130 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 22 May 2024 13:14:09 +0000 Subject: [PATCH 02/17] Offload OUTBS computation to GPU --- src/ecwam/CMakeLists.txt | 1 + src/ecwam/aki.F90 | 1 + src/ecwam/cal_second_order_spec.F90 | 26 +- src/ecwam/ctcor.F90 | 10 +- src/ecwam/dominant_period.F90 | 8 +- src/ecwam/drvtype_mod.fypp | 1 + src/ecwam/ecwam_loki_gpu.config | 33 +- src/ecwam/fndprt.F90 | 24 +- src/ecwam/h_max.F90 | 6 +- src/ecwam/intpol.F90 | 16 +- src/ecwam/kurtosis.F90 | 24 +- src/ecwam/meansqs.F90 | 18 +- src/ecwam/mwp1.F90 | 10 +- src/ecwam/mwp2.F90 | 10 +- src/ecwam/outbeta.F90 | 24 +- src/ecwam/outblock.F90 | 468 ++++++++++++---------------- src/ecwam/outbs_loki_gpu.F90 | 131 ++++++++ src/ecwam/outsetwmask.F90 | 6 +- src/ecwam/outspec.F90 | 2 +- src/ecwam/parmean.F90 | 6 +- src/ecwam/scosfl.F90 | 8 +- src/ecwam/se10mean.F90 | 6 +- src/ecwam/sebtmean.F90 | 6 +- src/ecwam/secspom.F90 | 19 +- src/ecwam/sep3tr.F90 | 49 ++- src/ecwam/sepwisw.F90 | 28 +- src/ecwam/stat_nl.F90 | 6 +- src/ecwam/sthq.F90 | 6 +- src/ecwam/transf_bfi.F90 | 1 + src/ecwam/transf_r.F90 | 1 + src/ecwam/w_maxh.F90 | 88 +++--- src/ecwam/w_mode_st.F90 | 1 + src/ecwam/wamintgr_loki_gpu.F90 | 2 - src/ecwam/wamodel.F90 | 38 ++- src/ecwam/wdirspread.F90 | 19 +- src/ecwam/weflux.F90 | 10 +- src/ecwam/yowpcons.F90 | 1 + 37 files changed, 614 insertions(+), 500 deletions(-) create mode 100644 src/ecwam/outbs_loki_gpu.F90 diff --git a/src/ecwam/CMakeLists.txt b/src/ecwam/CMakeLists.txt index 8db84753..7a62ef68 100644 --- a/src/ecwam/CMakeLists.txt +++ b/src/ecwam/CMakeLists.txt @@ -363,6 +363,7 @@ if(HAVE_LOKI AND NOT LOKI_MODE MATCHES "idem|idem-stack") list(APPEND ecwam_srcs wamintgr_loki_gpu.F90) list(REMOVE_ITEM ecwam_srcs wamintgr.F90) list(APPEND ecwam_srcs cireduce_loki_gpu.F90) + list(APPEND ecwam_srcs outbs_loki_gpu.F90) endif() # expand derived-types using src/ecwam/yowdrvtype_config.yml diff --git a/src/ecwam/aki.F90 b/src/ecwam/aki.F90 index f84058bb..b75214dc 100644 --- a/src/ecwam/aki.F90 +++ b/src/ecwam/aki.F90 @@ -52,6 +52,7 @@ REAL(KIND=JWRB) FUNCTION AKI(OM,BETA) ! ---------------------------------------------------------------------- IMPLICIT NONE +!$loki routine seq REAL(KIND=JWRB), INTENT(IN) :: OM, BETA diff --git a/src/ecwam/cal_second_order_spec.F90 b/src/ecwam/cal_second_order_spec.F90 index d6fc02e1..6e18c324 100644 --- a/src/ecwam/cal_second_order_spec.F90 +++ b/src/ecwam/cal_second_order_spec.F90 @@ -52,8 +52,8 @@ SUBROUTINE CAL_SECOND_ORDER_SPEC(KIJS, KIJL, F1, WAVNUM, DEPTH, SIG) USE YOWPARAM, ONLY : NANG, NFRE USE YOWPCONS, ONLY : G, PI, ZPI USE YOWSHAL , ONLY : NDEPTH, DEPTHA, DEPTHD - USE YOWTABL , ONLY : MR, XMR, MA, XMA, NFREH, NANGH, NMAX, & - & OMEGA, DFDTH, THH, DELTHH, IM_P, IM_M, & + USE YOWTABL , ONLY : MR, XMR, MA, XMA, NFREH, NANGH, & + & OMEGA, DFDTH, THH, DELTHH, IM_P, IM_M, & & TA, TB, TC_QL, TT_4M, TT_4P USE YOWTEST , ONLY : IU06 @@ -66,9 +66,9 @@ SUBROUTINE CAL_SECOND_ORDER_SPEC(KIJS, KIJL, F1, WAVNUM, DEPTH, SIG) #include "secspom.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(INOUT) :: F1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE), INTENT(IN) :: WAVNUM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: DEPTH + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(INOUT) :: F1 + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE), INTENT(IN) :: WAVNUM + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: DEPTH REAL(KIND=JWRB), INTENT(IN) :: SIG INTEGER(KIND=JWIM) :: IJ,M,K,K0,M0,MP,KP,MM,KM,KL,KLL,ML @@ -76,10 +76,10 @@ SUBROUTINE CAL_SECOND_ORDER_SPEC(KIJS, KIJL, F1, WAVNUM, DEPTH, SIG) REAL(KIND=JWRB) :: FRAC,CO1,DEL,DELF,D1,D2,D3,D4,C1 REAL(KIND=JWRB) :: C2,XM,XK,OMSTART,AREA,SUM,SUM1,SUM3,GAM_B_J,ZFAC REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: EMEAN, FMEAN - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: F1MEAN, AKMEAN, XKMEAN, EMAXL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE) :: F3 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANGH,NFREH) :: PF1, PF3 + REAL(KIND=JWRB), DIMENSION(KIJL) :: EMEAN, FMEAN + REAL(KIND=JWRB), DIMENSION(KIJL) :: F1MEAN, AKMEAN, XKMEAN, EMAXL + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE) :: F3 + REAL(KIND=JWRB), DIMENSION(KIJL,NANGH,NFREH) :: PF1, PF3 !----------------------------------------------------------------------- @@ -104,8 +104,8 @@ SUBROUTINE CAL_SECOND_ORDER_SPEC(KIJS, KIJL, F1, WAVNUM, DEPTH, SIG) !*** 1.11 NO INTERPOLATION. ! ---------------------- - CALL SECSPOM(F1,F3,KIJS,KIJL,NFRE,NANG,NMAX,NDEPTH,DEPTHA, & - & DEPTHD,OMSTART,FRAC,MR,DFDTH,OMEGA,DEPTH, & + CALL SECSPOM(F1,F3,KIJS,KIJL,NFRE,NANG,NDEPTH,DEPTHA, & + & DEPTHD,OMSTART,FRAC,MR,DFDTH,OMEGA,DEPTH, & & AKMEAN,TA,TB,TC_QL,TT_4M,TT_4P,IM_P,IM_M) DO M=1,NFRE DO K=1,NANG @@ -138,8 +138,8 @@ SUBROUTINE CAL_SECOND_ORDER_SPEC(KIJS, KIJL, F1, WAVNUM, DEPTH, SIG) !*** 1.13 DETERMINE SECOND-ORDER SPEC ! -------------------------------- - CALL SECSPOM(PF1,PF3,KIJS,KIJL,NFREH,NANGH,NMAX,NDEPTH,DEPTHA, & - & DEPTHD,OMSTART,FRAC,MR,DFDTH,OMEGA,DEPTH, & + CALL SECSPOM(PF1,PF3,KIJS,KIJL,NFREH,NANGH,NDEPTH,DEPTHA, & + & DEPTHD,OMSTART,FRAC,MR,DFDTH,OMEGA,DEPTH, & & AKMEAN,TA,TB,TC_QL,TT_4M,TT_4P,IM_P,IM_M) !*** 2.24 INTERPOLATE TOWARDS HIGH-RES GRID diff --git a/src/ecwam/ctcor.F90 b/src/ecwam/ctcor.F90 index c840ee7f..69c144ab 100644 --- a/src/ecwam/ctcor.F90 +++ b/src/ecwam/ctcor.F90 @@ -49,16 +49,16 @@ SUBROUTINE CTCOR (KIJS, KIJL, F, CTR) IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), INTENT(IN) :: F(KIJS:KIJL,NANG,NFRE) - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: CTR + REAL(KIND=JWRB), INTENT(IN) :: F(KIJL,NANG,NFRE) + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: CTR INTEGER(KIND=JWIM) :: IJ, K, M REAL(KIND=JWRB) :: FR1M1, ZARG, ZAMP REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: EM, ZT1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: ZRHO, ZLAM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL, NFRE) :: TEMP + REAL(KIND=JWRB), DIMENSION(KIJL) :: EM, ZT1 + REAL(KIND=JWRB), DIMENSION(KIJL) :: ZRHO, ZLAM + REAL(KIND=JWRB), DIMENSION(KIJL, NFRE) :: TEMP ! ---------------------------------------------------------------------- diff --git a/src/ecwam/dominant_period.F90 b/src/ecwam/dominant_period.F90 index 6f25aa58..8f182847 100644 --- a/src/ecwam/dominant_period.F90 +++ b/src/ecwam/dominant_period.F90 @@ -57,14 +57,14 @@ SUBROUTINE DOMINANT_PERIOD (KIJS, KIJL, FL1, DP) INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: FL1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: DP + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: FL1 + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: DP REAL(KIND=JWRB), PARAMETER :: FLTHRS = 0.1_JWRB INTEGER(KIND=JWIM) :: IJ, K, M - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP, EM, FCROP - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE) :: F1D4 + REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP, EM, FCROP + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE) :: F1D4 REAL(KIND=JPHOOK) :: ZHOOK_HANDLE diff --git a/src/ecwam/drvtype_mod.fypp b/src/ecwam/drvtype_mod.fypp index 822f40f0..b61c45ef 100644 --- a/src/ecwam/drvtype_mod.fypp +++ b/src/ecwam/drvtype_mod.fypp @@ -90,6 +90,7 @@ MODULE ${obj.upper()}$_TYPE_MOD NULLIFY(SELF%${var.upper()}$) !$acc exit data detach(SELF%${var.upper()}$) CALL FIELD_DELETE(SELF%F_${var.upper()}$) + NULLIFY(SELF%F_${var.upper()}$) #:endfor #:endfor diff --git a/src/ecwam/ecwam_loki_gpu.config b/src/ecwam/ecwam_loki_gpu.config index b6b95a79..1666acc3 100644 --- a/src/ecwam/ecwam_loki_gpu.config +++ b/src/ecwam/ecwam_loki_gpu.config @@ -11,14 +11,16 @@ enable_imports = true disable = [ 'yomhook', 'abor1', 'abort1', 'gstats', 'yowgstats', 'wam_user_clock', 'parkind1', 'propag_wam', 'newwind', 'oml_mod', 'field_module', 'incdate', - 'ieee_arithmetic', # intrinsic modules (should have INTRINSIC in their USE statement) + 'ieee_arithmetic', 'ieee_exceptions', # intrinsic modules (should have INTRINSIC in their USE statement) + 'ieee_set_halting_mode', 'ieee_get_halting_mode', # intrinsic subroutines 'mfeb_length', 'cdm', # internal functions 'outwspec_io_serv_handler', 'outint_io_serv_handler', 'ifstowam_handler', # procedure pointers - '*%sync_host*', '*%sync_device*' + '*%sync_host*', '*%sync_device*', + 'df', 'f' # statement functions ] # modules to be parsed but not transformed -ignore = ['yowgrid', 'yowtest', 'yowshal'] +ignore = ['yowgrid', 'yowtest'] # Prune the tree for these to ensure they are not processed by transformations block = ['ec_parkind', 'parkind_wave', 'yowdrvtype'] @@ -46,6 +48,13 @@ block = ['ec_parkind', 'parkind_wave', 'yowdrvtype'] replicate = false real_kind = 'JWRB' +[routines.outbs_loki_gpu] + role = "driver" + expand = true + replicate = false + real_kind = 'JWRB' + block = ['outwnorm', 'yowdrvtype', 'parkind_wave'] + # add inline function calls here to force the plan to add them [routines.chnkmin] [routines.ns_gc] @@ -53,6 +62,15 @@ block = ['ec_parkind', 'parkind_wave', 'yowdrvtype'] [routines.transf_snl] [routines.transf] [routines.aki_ice] +[routines.aki] +[routines.w_mode_st] +[routines.transf_r] +[routines.transf_bfi] + +# we add loki inlined routines here rather than the ignore list because we want them to be sanitised +# before inlining +[routines.sebtmean] +[routines.scosfl] # Disable replication for modules containing global variables [routines.yowaltas] @@ -94,6 +112,15 @@ block = ['ec_parkind', 'parkind_wave', 'yowdrvtype'] [routines.yowwndg] expand = false replicate = false +[routines.yowshal] + expand = false + replicate = false +[routines.yowcurr] + expand = false + replicate = false +[routines.yowmap] + expand = false + replicate = false # Define indices and bounds for array dimensions [dimensions.horizontal] diff --git a/src/ecwam/fndprt.F90 b/src/ecwam/fndprt.F90 index 0367da55..83fe0f5c 100644 --- a/src/ecwam/fndprt.F90 +++ b/src/ecwam/fndprt.F90 @@ -83,34 +83,34 @@ SUBROUTINE FNDPRT (KIJS, KIJL, NPMAX, & #include "parmean.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL, NPMAX - INTEGER(KIND=JWIM), INTENT(IN), DIMENSION(KIJS:KIJL) :: MIJ - INTEGER(KIND=JWIM), INTENT(INOUT), DIMENSION(KIJS:KIJL) :: NPEAK - INTEGER(KIND=JWIM), INTENT(IN), DIMENSION(KIJS:KIJL,NPMAX) :: NTHP, NFRP + INTEGER(KIND=JWIM), INTENT(IN), DIMENSION(KIJL) :: MIJ + INTEGER(KIND=JWIM), INTENT(INOUT), DIMENSION(KIJL) :: NPEAK + INTEGER(KIND=JWIM), INTENT(IN), DIMENSION(KIJL,NPMAX) :: NTHP, NFRP - REAL(KIND=JWRB), INTENT(IN), DIMENSION(KIJS:KIJL) :: FLNOISE - REAL(KIND=JWRB), INTENT(IN), DIMENSION(KIJS:KIJL,NANG,NFRE) :: FLLOW, FL1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(INOUT) :: SWM - REAL(KIND=JWRB), INTENT(OUT), DIMENSION(KIJS:KIJL,0:NPMAX) :: DIR, PER, ENE + REAL(KIND=JWRB), INTENT(IN), DIMENSION(KIJL) :: FLNOISE + REAL(KIND=JWRB), INTENT(IN), DIMENSION(KIJL,NANG,NFRE) :: FLLOW, FL1 + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(INOUT) :: SWM + REAL(KIND=JWRB), INTENT(OUT), DIMENSION(KIJL,0:NPMAX) :: DIR, PER, ENE - LOGICAL, INTENT(IN), DIMENSION(KIJS:KIJL,NANG) :: LLCOSDIFF + LOGICAL, INTENT(IN), DIMENSION(KIJL,NANG) :: LLCOSDIFF INTEGER(KIND=JWIM) :: ITHC, IFRC INTEGER(KIND=JWIM) :: IJ, M, K, IP, NITT INTEGER(KIND=JWIM) :: NANGH, KK, KKMIN, KKMAX INTEGER(KIND=JWIM) :: IFRL, ITHL, ITHR - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL) :: MMIN, MMAX + INTEGER(KIND=JWIM), DIMENSION(KIJL) :: MMIN, MMAX INTEGER(KIND=JWIM), DIMENSION(1-NANG:2*NANG) :: KLOC REAL(KIND=JWRB) :: HALF_SECTOR REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB), DIMENSION(NANG,NFRE) :: W2 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE) :: W1 - REAL(KIND=JWRB), DIMENSION(NANG,NFRE,NPMAX,KIJS:KIJL) :: SPEC + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE) :: W1 + REAL(KIND=JWRB), DIMENSION(NANG,NFRE,NPMAX,KIJL) :: SPEC LOGICAL :: LLCHANGE, LLADD LOGICAL :: LLADDPART - LOGICAL, DIMENSION(KIJS:KIJL,NANG,NFRE) :: LLW3 + LOGICAL, DIMENSION(KIJL,NANG,NFRE) :: LLW3 ! ---------------------------------------------------------------------- diff --git a/src/ecwam/h_max.F90 b/src/ecwam/h_max.F90 index 72a3ece2..f93e4581 100644 --- a/src/ecwam/h_max.F90 +++ b/src/ecwam/h_max.F90 @@ -58,8 +58,8 @@ SUBROUTINE H_MAX(C3,C4,XNSLC,KIJS,KIJL,AA,BB,HMAXN,SIG_HM) IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: C3, C4, XNSLC - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: AA, BB, HMAXN, SIG_HM + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: C3, C4, XNSLC + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: AA, BB, HMAXN, SIG_HM INTEGER(KIND=JWIM), PARAMETER :: NITER = 5 @@ -79,7 +79,7 @@ SUBROUTINE H_MAX(C3,C4,XNSLC,KIJS,KIJL,AA,BB,HMAXN,SIG_HM) REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB) :: ZEPSILON REAL(KIND=JWRB) :: TWOG1, G2, AE, BE, F, Z0, EMIN, EMAX, EVAL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL):: E, BBM1, DFNORMA + REAL(KIND=JWRB), DIMENSION(KIJL):: E, BBM1, DFNORMA !---------------------------------------------------------------------- diff --git a/src/ecwam/intpol.F90 b/src/ecwam/intpol.F90 index 3834b2d9..83b62836 100644 --- a/src/ecwam/intpol.F90 +++ b/src/ecwam/intpol.F90 @@ -69,25 +69,25 @@ SUBROUTINE INTPOL (KIJS, KIJL, FLR, FLA, WAVNUM, UCUR, VCUR, IRA) #include "abort1.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: FLR - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(OUT) :: FLA - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE), INTENT(IN) :: WAVNUM - REAL(KIND=JWRB), DIMENSION (KIJS:KIJL), INTENT(IN) :: UCUR, VCUR + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: FLR + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(OUT) :: FLA + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE), INTENT(IN) :: WAVNUM + REAL(KIND=JWRB), DIMENSION (KIJL), INTENT(IN) :: UCUR, VCUR INTEGER(KIND=JWIM), INTENT(IN) :: IRA INTEGER(KIND=JWIM) :: IJ, M, K INTEGER(KIND=JWIM) :: NFRE_MAX, NEWM, NEWM1, KH - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL) :: NEWF, NEWFLA, KNEW + INTEGER(KIND=JWIM), DIMENSION(KIJL) :: NEWF, NEWFLA, KNEW REAL(KIND=JWRB) :: FRE0, CDF, ZPI2GM, COEF, FMAX, FREQ, DFREQTH, FR5OFREQ5 REAL(KIND=JWRB) :: FNEW, GWH REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB), DIMENSION(NFRE) :: DFTH - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: OLDFL, WAVN - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: FNEF, GWP, GWM + REAL(KIND=JWRB), DIMENSION(KIJL) :: OLDFL, WAVN + REAL(KIND=JWRB), DIMENSION(KIJL) :: FNEF, GWP, GWM - LOGICAL, DIMENSION(KIJS:KIJL) :: LICE2SEA + LOGICAL, DIMENSION(KIJL) :: LICE2SEA ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('INTPOL',0,ZHOOK_HANDLE) diff --git a/src/ecwam/kurtosis.F90 b/src/ecwam/kurtosis.F90 index 65a0283b..54236e84 100644 --- a/src/ecwam/kurtosis.F90 +++ b/src/ecwam/kurtosis.F90 @@ -203,11 +203,11 @@ SUBROUTINE KURTOSIS(KIJS, KIJL, FL1, & INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: FL1 + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: FL1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: DEPTH - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: C3, C4, BF2, QP, HMAX, TMAX - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: ETA_M, R, XNSLC, SIG_TH, EPS, XNU + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: DEPTH + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: C3, C4, BF2, QP, HMAX, TMAX + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: ETA_M, R, XNSLC, SIG_TH, EPS, XNU INTEGER(KIND=JWIM) :: IJ, M, K @@ -225,14 +225,14 @@ SUBROUTINE KURTOSIS(KIJS, KIJL, FL1, & REAL(KIND=JWRB) :: ZEPSILON, ZSQREPSILON, FRMAX, FRMIN REAL(KIND=JWRB), DIMENSION(NFRE) :: FAC4 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: HMAXN - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: SUM0,SUM1,SUM2,SUM4,SUM40,SUM6 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: XKP,SIG_OM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: SIG_HM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: F_M,OM_UP - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: AA,BB,C4_DYN,C4_B - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: FFMAX - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE) :: FF + REAL(KIND=JWRB), DIMENSION(KIJL) :: HMAXN + REAL(KIND=JWRB), DIMENSION(KIJL) :: SUM0,SUM1,SUM2,SUM4,SUM40,SUM6 + REAL(KIND=JWRB), DIMENSION(KIJL) :: XKP,SIG_OM + REAL(KIND=JWRB), DIMENSION(KIJL) :: SIG_HM + REAL(KIND=JWRB), DIMENSION(KIJL) :: F_M,OM_UP + REAL(KIND=JWRB), DIMENSION(KIJL) :: AA,BB,C4_DYN,C4_B + REAL(KIND=JWRB), DIMENSION(KIJL) :: FFMAX + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE) :: FF !----------------------------------------------------------------------- diff --git a/src/ecwam/meansqs.F90 b/src/ecwam/meansqs.F90 index 1489b981..92fca79d 100644 --- a/src/ecwam/meansqs.F90 +++ b/src/ecwam/meansqs.F90 @@ -66,11 +66,11 @@ SUBROUTINE MEANSQS(XKMSS, KIJS, KIJL, F, WAVNUM, USTAR, COSWDIF, XMSS) REAL(KIND=JWRB), INTENT(IN) :: XKMSS INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: F - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE), INTENT(IN) :: WAVNUM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: USTAR - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG), INTENT(IN) :: COSWDIF - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: XMSS + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: F + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE), INTENT(IN) :: WAVNUM + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: USTAR + REAL(KIND=JWRB), DIMENSION(KIJL,NANG), INTENT(IN) :: COSWDIF + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: XMSS INTEGER(KIND=JWIM) :: IJ, NFRE_MSS, NFRE_EFF @@ -78,9 +78,9 @@ SUBROUTINE MEANSQS(XKMSS, KIJS, KIJL, F, WAVNUM, USTAR, COSWDIF, XMSS) REAL(KIND=JWRB) :: XLOGFS, FCUT REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB), DIMENSION(NFRE) :: FD - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP1, TEMP2 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: XMSSLF, XMSS_TAIL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: HALP, FRGC + REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP1, TEMP2 + REAL(KIND=JWRB), DIMENSION(KIJL) :: XMSSLF, XMSS_TAIL + REAL(KIND=JWRB), DIMENSION(KIJL) :: HALP, FRGC ! ---------------------------------------------------------------------- @@ -101,7 +101,7 @@ SUBROUTINE MEANSQS(XKMSS, KIJS, KIJL, F, WAVNUM, USTAR, COSWDIF, XMSS) NFRE_EFF = MIN(NFRE,NFRE_MSS) CALL MEANSQS_LF (NFRE_EFF, KIJS, KIJL, F, WAVNUM, XMSSLF) - XMSS(:) = XMSS(:) + XMSSLF(:) + XMSS(KIJS:KIJL) = XMSS(KIJS:KIJL) + XMSSLF(KIJS:KIJL) ! ADD TAIL CORRECTION TO MEAN SQUARE SLOPE (between FR(NFRE_EFF) and FRGC). diff --git a/src/ecwam/mwp1.F90 b/src/ecwam/mwp1.F90 index fd872daf..e75a70d9 100644 --- a/src/ecwam/mwp1.F90 +++ b/src/ecwam/mwp1.F90 @@ -61,14 +61,14 @@ SUBROUTINE MWP1 (KIJS, KIJL, F, MEANWP1) IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), INTENT(IN) :: F(KIJS:KIJL,NANG,NFRE) - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: MEANWP1 + REAL(KIND=JWRB), INTENT(IN) :: F(KIJL,NANG,NFRE) + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: MEANWP1 INTEGER(KIND=JWIM) :: IJ, K, M REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB) :: DELT25, COEF_FR, FR1M1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP, EM + REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP, EM LOGICAL :: LL_HALT_INVALID @@ -78,8 +78,10 @@ SUBROUTINE MWP1 (KIJS, KIJL, F, MEANWP1) ! Turn off Floating-Point-Exceptions in this scope to avoid FPE_INVALID in optimized code ! with branch prediction. It is safe to do so as DIV_BY_ZERO is protected. +#ifndef WAM_GPU CALL IEEE_GET_HALTING_MODE(IEEE_INVALID, LL_HALT_INVALID) IF (LL_HALT_INVALID) CALL IEEE_SET_HALTING_MODE(IEEE_INVALID, .FALSE.) +#endif DO IJ=KIJS,KIJL EM(IJ) = 0.0_JWRB @@ -119,7 +121,9 @@ SUBROUTINE MWP1 (KIJS, KIJL, F, MEANWP1) ENDIF ENDDO +#ifndef WAM_GPU IF (LL_HALT_INVALID) CALL IEEE_SET_HALTING_MODE(IEEE_INVALID, .TRUE.) +#endif IF (LHOOK) CALL DR_HOOK('MWP1',1,ZHOOK_HANDLE) diff --git a/src/ecwam/mwp2.F90 b/src/ecwam/mwp2.F90 index c86efdda..b0ee1d9f 100644 --- a/src/ecwam/mwp2.F90 +++ b/src/ecwam/mwp2.F90 @@ -61,13 +61,13 @@ SUBROUTINE MWP2 (KIJS, KIJL, F, MEANWP2) IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), INTENT(IN) :: F(KIJS:KIJL,NANG,NFRE) - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: MEANWP2 + REAL(KIND=JWRB), INTENT(IN) :: F(KIJL,NANG,NFRE) + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: MEANWP2 INTEGER(KIND=JWIM) :: IJ, K, M REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB) :: DELT25, COEF_FR, FR1M1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP, EM + REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP, EM LOGICAL :: LL_HALT_INVALID @@ -77,8 +77,10 @@ SUBROUTINE MWP2 (KIJS, KIJL, F, MEANWP2) ! Turn off Floating-Point-Exceptions in this scope to avoid FPE_INVALID in optimized code ! with branch prediction. It is safe to do so as DIV_BY_ZERO is protected. +#ifndef WAM_GPU CALL IEEE_GET_HALTING_MODE(IEEE_INVALID, LL_HALT_INVALID) IF (LL_HALT_INVALID) CALL IEEE_SET_HALTING_MODE(IEEE_INVALID, .FALSE.) +#endif DO IJ=KIJS,KIJL EM(IJ) = 0.0_JWRB @@ -118,7 +120,9 @@ SUBROUTINE MWP2 (KIJS, KIJL, F, MEANWP2) ENDIF ENDDO +#ifndef WAM_GPU IF (LL_HALT_INVALID) CALL IEEE_SET_HALTING_MODE(IEEE_INVALID, .TRUE.) +#endif IF (LHOOK) CALL DR_HOOK('MWP2',1,ZHOOK_HANDLE) diff --git a/src/ecwam/outbeta.F90 b/src/ecwam/outbeta.F90 index bf08de30..0e758312 100644 --- a/src/ecwam/outbeta.F90 +++ b/src/ecwam/outbeta.F90 @@ -77,14 +77,14 @@ SUBROUTINE OUTBETA (KIJS, KIJL, & IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: U10 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: USTAR - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: Z0M - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: Z0B - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: CHRNCK - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: BETAM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: BETAHQ - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT), OPTIONAL :: CD + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: U10 + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: USTAR + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: Z0M + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: Z0B + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: CHRNCK + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: BETAM + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: BETAHQ + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT), OPTIONAL :: CD REAL(KIND=JWRB), PARAMETER :: AMAX=0.02_JWRB @@ -98,8 +98,8 @@ SUBROUTINE OUTBETA (KIJS, KIJL, & INTEGER(KIND=JWIM) :: IJ REAL(KIND=JWRB) :: GUSM2, Z0ATM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: USM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: ALPHAMAXU10 + REAL(KIND=JWRB), DIMENSION(KIJL) :: USM + REAL(KIND=JWRB), DIMENSION(KIJL) :: ALPHAMAXU10 REAL(KIND=JPHOOK) :: ZHOOK_HANDLE ! ---------------------------------------------------------------------- @@ -111,9 +111,9 @@ SUBROUTINE OUTBETA (KIJS, KIJL, & ! ---------------------- IF (LLGCBZ0) THEN - ALPHAMAXU10(:)=ALPHAMAX + ALPHAMAXU10(KIJS:KIJL)=ALPHAMAX ELSE - ALPHAMAXU10(:)=MIN(ALPHAMAX,AMAX+BMAX*U10(:)) + ALPHAMAXU10(KIJS:KIJL)=MIN(ALPHAMAX,AMAX+BMAX*U10(KIJS:KIJL)) ENDIF diff --git a/src/ecwam/outblock.F90 b/src/ecwam/outblock.F90 index 305e11eb..73403901 100644 --- a/src/ecwam/outblock.F90 +++ b/src/ecwam/outblock.F90 @@ -94,32 +94,30 @@ SUBROUTINE OUTBLOCK (KIJS, KIJL, MIJ, & #include "wdirspread.intfb.h" #include "weflux.intfb.h" #include "w_maxh.intfb.h" -#include "halphap.intfb.h" -#include "alphap_tail.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL), INTENT(IN) :: MIJ - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: FL1, XLLWS - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL, NFRE), INTENT(IN) :: WAVNUM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL, NFRE), INTENT(IN) :: CINV - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL, NFRE), INTENT(IN) :: CGROUP - - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: DEPTH - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: UCUR - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: VCUR - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL), INTENT(IN) :: IODP - - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: AIRD, WDWAVE, CICOVER, WSWAVE, WSTAR - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: UFRIC, TAUW, Z0M, Z0B, CHRNCK, CITHICK - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: ALTWH, CALTWH, RALTCOR, USTOKES, VSTOKES, STRNMS - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: TAUXD, TAUYD, TAUOCXD, TAUOCYD, TAUOC, PHIOCD - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: PHIEPS, PHIAW - REAL(KIND=JWRO), DIMENSION(KIJS:KIJL), INTENT(IN) :: NEMOSST, NEMOCICOVER, NEMOCITHICK, NEMOUCUR, NEMOVCUR - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NIPRMOUT), INTENT(OUT) :: BOUT + INTEGER(KIND=JWIM), DIMENSION(KIJL), INTENT(IN) :: MIJ + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: FL1, XLLWS + REAL(KIND=JWRB), DIMENSION(KIJL, NFRE), INTENT(IN) :: WAVNUM + REAL(KIND=JWRB), DIMENSION(KIJL, NFRE), INTENT(IN) :: CINV + REAL(KIND=JWRB), DIMENSION(KIJL, NFRE), INTENT(IN) :: CGROUP + + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: DEPTH + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: UCUR + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: VCUR + INTEGER(KIND=JWIM), DIMENSION(KIJL), INTENT(IN) :: IODP + + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: AIRD, WDWAVE, CICOVER, WSWAVE, WSTAR + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: UFRIC, TAUW, Z0M, Z0B, CHRNCK, CITHICK + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: ALTWH, CALTWH, RALTCOR, USTOKES, VSTOKES, STRNMS + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: TAUXD, TAUYD, TAUOCXD, TAUOCYD, TAUOC, PHIOCD + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: PHIEPS, PHIAW + REAL(KIND=JWRO), DIMENSION(KIJL), INTENT(IN) :: NEMOSST, NEMOCICOVER, NEMOCITHICK, NEMOUCUR, NEMOVCUR + REAL(KIND=JWRB), DIMENSION(KIJL,NIPRMOUT), INTENT(OUT) :: BOUT INTEGER(KIND=JWIM), PARAMETER :: NTEWH=6 - INTEGER(KIND=JWIM) :: IJ, K, M, ITG, IR, ITR, IH + INTEGER(KIND=JWIM) :: IJ, K, M, ITG, ITR, IH INTEGER(KIND=JWIM) :: IRA REAL(KIND=JWRB) :: SIG @@ -127,22 +125,22 @@ SUBROUTINE OUTBLOCK (KIJS, KIJL, MIJ, & REAL(KIND=JWRB) :: XMODEL_CUTOFF REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB), DIMENSION(0:NTEWH) :: TEWH - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: EM, FM, DP - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: C3, C4, BF, QP, HMAX, TMAX - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: CMAX_F, HMAX_N, CMAX_ST, HMAX_ST, PHIST - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: ETA_M, R, XNSLC, SIG_TH, EPS, XNU - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: FLD1, FLD2 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: ESWELL ,FSWELL ,THSWELL, P1SWELL, P2SWELL, SPRDSWELL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: ESEA ,FSEA ,THWISEA, P1SEA , P2SEA , SPRDSEA - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: CHARNOCK, BETAHQ, CDATM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: HALP - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NTRAIN) :: EMTRAIN - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NTRAIN) :: THTRAIN, PMTRAIN - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG) :: COSWDIF + REAL(KIND=JWRB), DIMENSION(KIJL) :: EM, FM, DP + REAL(KIND=JWRB), DIMENSION(KIJL) :: C3, C4, BF, QP, HMAX, TMAX + REAL(KIND=JWRB), DIMENSION(KIJL) :: CMAX_F, HMAX_N, CMAX_ST, HMAX_ST, PHIST + REAL(KIND=JWRB), DIMENSION(KIJL) :: ETA_M, R, XNSLC, SIG_TH, EPS, XNU + REAL(KIND=JWRB), DIMENSION(KIJL) :: FLD1, FLD2 + REAL(KIND=JWRB), DIMENSION(KIJL) :: ESWELL ,FSWELL ,THSWELL, P1SWELL, P2SWELL, SPRDSWELL + REAL(KIND=JWRB), DIMENSION(KIJL) :: ESEA ,FSEA ,THWISEA, P1SEA , P2SEA , SPRDSEA + REAL(KIND=JWRB), DIMENSION(KIJL) :: CHARNOCK, BETAHQ, CDATM + REAL(KIND=JWRB), DIMENSION(KIJL) :: HALP + REAL(KIND=JWRB), DIMENSION(KIJL,NTRAIN) :: EMTRAIN + REAL(KIND=JWRB), DIMENSION(KIJL,NTRAIN) :: THTRAIN, PMTRAIN + REAL(KIND=JWRB), DIMENSION(KIJL,NANG) :: COSWDIF ! *FL2ND* SPECTRUM with second order effect added if LSECONDORDER is true . ! and in the absolute frame of reference if currents are used - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE) :: FL2ND + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE) :: FL2ND LOGICAL :: LLPEAKF @@ -198,59 +196,51 @@ SUBROUTINE OUTBLOCK (KIJS, KIJL, MIJ, & ! LOAD THE OUTPUT BUFFER: - IR=0 - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(1) /= 0) THEN ! SIGNIFICANT WAVE HEIGHT CONVERSION - BOUT(KIJS:KIJL,ITOBOUT(IR))=4._JWRB*SQRT(MAX(EM(KIJS:KIJL),0._JWRB)) + BOUT(KIJS:KIJL,ITOBOUT(1))=4._JWRB*SQRT(MAX(EM(KIJS:KIJL),0._JWRB)) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - ITG=ITOBOUT(IR) + IF (IPFGTBL(2) /= 0) THEN + ITG=ITOBOUT(2) CALL STHQ (KIJS, KIJL, FL2ND, BOUT(KIJS,ITG)) ! CONVERT DIRECTIONS TO DEGREES AND METEOROLOGICAL CONVENTION BOUT(KIJS:KIJL,ITG)=MOD(DEG*BOUT(KIJS:KIJL,ITG)+180._JWRB,360._JWRB) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(3) /= 0) THEN ! CONVERSION TO PERIOD DO IJ=KIJS,KIJL IF (FM(IJ) > 0._JWRB) THEN - BOUT(IJ,ITOBOUT(IR))=1._JWRB/FM(IJ) + BOUT(IJ,ITOBOUT(3))=1._JWRB/FM(IJ) ELSE - BOUT(IJ,ITOBOUT(IR))=ZMISS + BOUT(IJ,ITOBOUT(3))=ZMISS ENDIF ENDDO ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=UFRIC(KIJS:KIJL) + IF (IPFGTBL(4) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(4))=UFRIC(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(5) /= 0) THEN ! CONVERT DIRECTIONS TO DEGREES AND METEOROLOGICAL CONVENTION - BOUT(KIJS:KIJL,ITOBOUT(IR))=MOD(DEG*WDWAVE(KIJS:KIJL)+180._JWRB,360._JWRB) + BOUT(KIJS:KIJL,ITOBOUT(5))=MOD(DEG*WDWAVE(KIJS:KIJL)+180._JWRB,360._JWRB) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(6) /= 0) THEN ! CONVERSION TO PERIOD DO IJ=KIJS,KIJL IF (DP(IJ) > 0.0_JWRB) THEN - BOUT(IJ,ITOBOUT(IR))=DP(IJ) + BOUT(IJ,ITOBOUT(6))=DP(IJ) ELSE - BOUT(IJ,ITOBOUT(IR))=ZMISS + BOUT(IJ,ITOBOUT(6))=ZMISS ENDIF ENDDO ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(7) /= 0) THEN !! if the numerical computation of TAU and CD changes, a similar !! modification has to be put in buildstress where the friction !! velocity is determined from U10 and CD. @@ -261,371 +251,308 @@ SUBROUTINE OUTBLOCK (KIJS, KIJL, MIJ, & & WSWAVE, UFRIC, Z0M, Z0B, CHRNCK, & & CHARNOCK, BETAHQ, CD=CDATM) - BOUT(KIJS:KIJL,ITOBOUT(IR))=MIN(CDATM(KIJS:KIJL), 0.01_JWRB) + BOUT(KIJS:KIJL,ITOBOUT(7))=MIN(CDATM(KIJS:KIJL), 0.01_JWRB) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=TAUW(KIJS:KIJL)/MAX(UFRIC(KIJS:KIJL)**2,EPSUS) + IF (IPFGTBL(8) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(8))=TAUW(KIJS:KIJL)/MAX(UFRIC(KIJS:KIJL)**2,EPSUS) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - CALL MEANSQS (XKMSS_CUTOFF, KIJS, KIJL, FL1, WAVNUM, UFRIC, COSWDIF, BOUT(KIJS,ITOBOUT(IR))) + IF (IPFGTBL(9) /= 0) THEN + CALL MEANSQS (XKMSS_CUTOFF, KIJS, KIJL, FL1, WAVNUM, UFRIC, COSWDIF, BOUT(:,ITOBOUT(9))) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=WSWAVE(KIJS:KIJL) + IF (IPFGTBL(10) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(10))=WSWAVE(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(11) /= 0) THEN ! WINDSEA SIGNIFICANT WAVE HEIGHT CONVERSION - BOUT(KIJS:KIJL,ITOBOUT(IR))=4._JWRB*SQRT(MAX(ESEA(KIJS:KIJL),0._JWRB)) + BOUT(KIJS:KIJL,ITOBOUT(11))=4._JWRB*SQRT(MAX(ESEA(KIJS:KIJL),0._JWRB)) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(12) /= 0) THEN ! TOTAL SWELL SIGNIFICANT WAVE HEIGHT CONVERSION - BOUT(KIJS:KIJL,ITOBOUT(IR))=4._JWRB*SQRT(MAX(ESWELL(KIJS:KIJL),0._JWRB)) + BOUT(KIJS:KIJL,ITOBOUT(12))=4._JWRB*SQRT(MAX(ESWELL(KIJS:KIJL),0._JWRB)) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(13) /= 0) THEN ! CONVERT DIRECTIONS TO DEGREES AND METEOROLOGICAL CONVENTION - BOUT(KIJS:KIJL,ITOBOUT(IR))=MOD(DEG*THWISEA(KIJS:KIJL)+180._JWRB,360._JWRB) + BOUT(KIJS:KIJL,ITOBOUT(13))=MOD(DEG*THWISEA(KIJS:KIJL)+180._JWRB,360._JWRB) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(14) /= 0) THEN ! CONVERT DIRECTIONS TO DEGREES AND METEOROLOGICAL CONVENTION - BOUT(KIJS:KIJL,ITOBOUT(IR))=MOD(DEG*THSWELL(KIJS:KIJL)+180._JWRB,360._JWRB) + BOUT(KIJS:KIJL,ITOBOUT(14))=MOD(DEG*THSWELL(KIJS:KIJL)+180._JWRB,360._JWRB) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(15) /= 0) THEN ! CONVERSION TO PERIOD DO IJ=KIJS,KIJL IF (FSEA(IJ) > 0._JWRB) THEN - BOUT(IJ,ITOBOUT(IR))=1._JWRB/FSEA(IJ) + BOUT(IJ,ITOBOUT(15))=1._JWRB/FSEA(IJ) ELSE - BOUT(IJ,ITOBOUT(IR))=ZMISS + BOUT(IJ,ITOBOUT(15))=ZMISS ENDIF ENDDO ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(16) /= 0) THEN ! CONVERSION TO PERIOD DO IJ=KIJS,KIJL IF (FSWELL(IJ) > 0._JWRB) THEN - BOUT(IJ,ITOBOUT(IR))=1._JWRB/FSWELL(IJ) + BOUT(IJ,ITOBOUT(16))=1._JWRB/FSWELL(IJ) ELSE - BOUT(IJ,ITOBOUT(IR))=ZMISS + BOUT(IJ,ITOBOUT(16))=ZMISS ENDIF ENDDO ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=ALTWH(KIJS:KIJL) + IF (IPFGTBL(17) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(17))=ALTWH(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=CALTWH(KIJS:KIJL) + IF (IPFGTBL(18) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(18))=CALTWH(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=RALTCOR(KIJS:KIJL) + IF (IPFGTBL(19) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(19))=RALTCOR(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - CALL MWP1 (KIJS, KIJL, FL2ND, BOUT(KIJS,ITOBOUT(IR))) + IF (IPFGTBL(20) /= 0) THEN + CALL MWP1 (KIJS, KIJL, FL2ND, BOUT(:,ITOBOUT(20))) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - CALL MWP2 (KIJS, KIJL, FL2ND, BOUT(KIJS,ITOBOUT(IR))) + IF (IPFGTBL(21) /= 0) THEN + CALL MWP2 (KIJS, KIJL, FL2ND, BOUT(:,ITOBOUT(21))) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - CALL WDIRSPREAD (KIJS, KIJL, FL2ND, EM, LLPEAKF, BOUT(KIJS,ITOBOUT(IR))) + IF (IPFGTBL(22) /= 0) THEN + CALL WDIRSPREAD (KIJS, KIJL, FL2ND, EM, LLPEAKF, BOUT(:,ITOBOUT(22))) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=P1SEA(KIJS:KIJL) + IF (IPFGTBL(23) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(23))=P1SEA(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=P1SWELL(KIJS:KIJL) + IF (IPFGTBL(24) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(24))=P1SWELL(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=P2SEA(KIJS:KIJL) + IF (IPFGTBL(25) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(25))=P2SEA(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=P2SWELL(KIJS:KIJL) + IF (IPFGTBL(26) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(26))=P2SWELL(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=SPRDSEA(KIJS:KIJL) + IF (IPFGTBL(27) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(27))=SPRDSEA(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=SPRDSWELL(KIJS:KIJL) + IF (IPFGTBL(28) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(28))=SPRDSWELL(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=C4(KIJS:KIJL) + IF (IPFGTBL(29) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(29))=C4(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=BF(KIJS:KIJL) + IF (IPFGTBL(30) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(30))=BF(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=QP(KIJS:KIJL) + IF (IPFGTBL(31) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(31))=QP(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=DEPTH(KIJS:KIJL) + IF (IPFGTBL(32) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(32))=DEPTH(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=HMAX(KIJS:KIJL) + IF (IPFGTBL(33) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(33))=HMAX(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=TMAX(KIJS:KIJL) + IF (IPFGTBL(34) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(34))=TMAX(KIJS:KIJL) ENDIF ! SURFACE STOKES DRIFT U and V - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=USTOKES(KIJS:KIJL) + IF (IPFGTBL(35) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(35))=USTOKES(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=VSTOKES(KIJS:KIJL) + IF (IPFGTBL(36) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(36))=VSTOKES(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=UCUR(KIJS:KIJL) + IF (IPFGTBL(37) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(37))=UCUR(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=VCUR(KIJS:KIJL) + IF (IPFGTBL(38) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(38))=VCUR(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=PHIEPS(KIJS:KIJL) + IF (IPFGTBL(39) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(39))=PHIEPS(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=PHIAW(KIJS:KIJL) + IF (IPFGTBL(40) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(40))=PHIAW(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=TAUOC(KIJS:KIJL) + IF (IPFGTBL(41) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(41))=TAUOC(KIJS:KIJL) ENDIF DO ITR=1,NTRAIN - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=4._JWRB*SQRT(MAX(EMTRAIN(KIJS:KIJL,ITR),0._JWRB)) + IF (IPFGTBL(42) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(42))=4._JWRB*SQRT(MAX(EMTRAIN(KIJS:KIJL,ITR),0._JWRB)) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=MOD(DEG*THTRAIN(KIJS:KIJL,ITR)+180._JWRB,360._JWRB) + IF (IPFGTBL(43) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(43))=MOD(DEG*THTRAIN(KIJS:KIJL,ITR)+180._JWRB,360._JWRB) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=PMTRAIN(KIJS:KIJL,ITR) + IF (IPFGTBL(44) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(44))=PMTRAIN(KIJS:KIJL,ITR) ENDIF ENDDO - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(45) /= 0) THEN IF (LWNEMOCOUSTRN) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=STRNMS(KIJS:KIJL) + BOUT(KIJS:KIJL,ITOBOUT(45))=STRNMS(KIJS:KIJL) ELSE - CALL CIMSSTRN (KIJS, KIJL, FL1, WAVNUM, DEPTH, CITHICK, BOUT(KIJS,ITOBOUT(IR))) + CALL CIMSSTRN (KIJS, KIJL, FL1, WAVNUM, DEPTH, CITHICK, BOUT(:,ITOBOUT(45))) ENDIF ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(46) /= 0) THEN CALL SE10MEAN (KIJS, KIJL, FL2ND, FLD1) - BOUT(KIJS:KIJL,ITOBOUT(IR))=4._JWRB*SQRT(MAX(FLD1(KIJS:KIJL),0._JWRB)) + BOUT(KIJS:KIJL,ITOBOUT(46))=4._JWRB*SQRT(MAX(FLD1(KIJS:KIJL),0._JWRB)) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=AIRD(KIJS:KIJL) + IF (IPFGTBL(47) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(47))=AIRD(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=WSTAR(KIJS:KIJL) + IF (IPFGTBL(48) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(48))=WSTAR(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=CICOVER(KIJS:KIJL) + IF (IPFGTBL(49) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(49))=CICOVER(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=CITHICK(KIJS:KIJL) + IF (IPFGTBL(50) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(50))=CITHICK(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=C3(KIJS:KIJL) + IF (IPFGTBL(51) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(51))=C3(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=NEMOSST(KIJS:KIJL) + IF (IPFGTBL(52) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(52))=NEMOSST(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=NEMOCICOVER(KIJS:KIJL) + IF (IPFGTBL(53) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(53))=NEMOCICOVER(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=NEMOCITHICK(KIJS:KIJL) + IF (IPFGTBL(54) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(54))=NEMOCITHICK(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=NEMOUCUR(KIJS:KIJL) + IF (IPFGTBL(55) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(55))=NEMOUCUR(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=NEMOVCUR(KIJS:KIJL) + IF (IPFGTBL(56) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(56))=NEMOVCUR(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0 .OR. IPFGTBL(IR+1) /= 0) THEN + IF (IPFGTBL(57) /= 0 .OR. IPFGTBL(58) /= 0) THEN CALL WEFLUX (KIJS, KIJL, FL1, CGROUP, & & NFRE, NANG, DFIM, DELTH, & & COSTH, SINTH, & & FLD1, FLD2) ENDIF - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=FLD1(KIJS:KIJL) + IF (IPFGTBL(57) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(57))=FLD1(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(58) /= 0) THEN ! CONVERT DIRECTIONS TO DEGREES AND METEOROLOGICAL CONVENTION - BOUT(KIJS:KIJL,ITOBOUT(IR))=MOD(DEG*FLD2(KIJS:KIJL)+180._JWRB,360._JWRB) + BOUT(KIJS:KIJL,ITOBOUT(58))=MOD(DEG*FLD2(KIJS:KIJL)+180._JWRB,360._JWRB) ENDIF DO IH=1,NTEWH - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - CALL SEBTMEAN (KIJS, KIJL, FL2ND, TEWH(IH-1), TEWH(IH), BOUT(KIJS,ITOBOUT(IR))) + IF (IPFGTBL(59) /= 0) THEN + !$loki inline + CALL SEBTMEAN (KIJS, KIJL, FL2ND, TEWH(IH-1), TEWH(IH), BOUT(:,ITOBOUT(59))) ! SIGNIFICANT WAVE HEIGHT CONVERSION - BOUT(KIJS:KIJL,ITOBOUT(IR))=4._JWRB*SQRT(MAX(BOUT(KIJS:KIJL,ITOBOUT(IR)),0._JWRB)) + BOUT(KIJS:KIJL,ITOBOUT(59))=4._JWRB*SQRT(MAX(BOUT(KIJS:KIJL,ITOBOUT(59)),0._JWRB)) ENDIF ENDDO - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=ETA_M(KIJS:KIJL) + IF (IPFGTBL(60) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(60))=ETA_M(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=R(KIJS:KIJL) + IF (IPFGTBL(61) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(61))=R(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=XNSLC(KIJS:KIJL) + IF (IPFGTBL(62) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(62))=XNSLC(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=TAUXD(KIJS:KIJL) + IF (IPFGTBL(63) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(63))=TAUXD(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=TAUYD(KIJS:KIJL) + IF (IPFGTBL(64) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(64))=TAUYD(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=TAUOCXD(KIJS:KIJL) + IF (IPFGTBL(65) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(65))=TAUOCXD(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=TAUOCYD(KIJS:KIJL) + IF (IPFGTBL(66) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(66))=TAUOCYD(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(67) /= 0) THEN ! !!! make the energy flux positive - BOUT(KIJS:KIJL,ITOBOUT(IR))=MAX(-PHIOCD(KIJS:KIJL),0.0_JWRB) + BOUT(KIJS:KIJL,ITOBOUT(67))=MAX(-PHIOCD(KIJS:KIJL),0.0_JWRB) ENDIF !! alternative ways to determine wave height extremes - IF (IPFGTBL(IR ) /= 0 .OR. IPFGTBL(IR+1) /= 0 .OR. & -& IPFGTBL(IR+2) /= 0 .OR. IPFGTBL(IR+3) /= 0 ) THEN + IF (IPFGTBL(67) /= 0 .OR. IPFGTBL(68) /= 0 .OR. & +& IPFGTBL(69) /= 0 .OR. IPFGTBL(70) /= 0 ) THEN CALL W_MAXH (KIJS, KIJL, FL1, DEPTH, WAVNUM, & & CMAX_F, HMAX_N, CMAX_ST, HMAX_ST, PHIST) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=CMAX_F(KIJS:KIJL) + IF (IPFGTBL(68) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(68))=CMAX_F(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=HMAX_N(KIJS:KIJL) + IF (IPFGTBL(69) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(69))=HMAX_N(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=CMAX_ST(KIJS:KIJL) + IF (IPFGTBL(70) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(70))=CMAX_ST(KIJS:KIJL) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=HMAX_ST(KIJS:KIJL) + IF (IPFGTBL(71) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(71))=HMAX_ST(KIJS:KIJL) ENDIF !! @@ -633,36 +560,31 @@ SUBROUTINE OUTBLOCK (KIJS, KIJL, MIJ, & ! COMPUTE OUTPUT EXTRA FIELDS ! add necessary code to compute the extra output fields !!!for testing - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - CALL CTCOR (KIJS, KIJL, FL1, BOUT(KIJS,ITOBOUT(IR))) + IF (IPFGTBL(72) /= 0) THEN + CALL CTCOR (KIJS, KIJL, FL1, BOUT(:,ITOBOUT(72))) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN + IF (IPFGTBL(73) /= 0) THEN XMODEL_CUTOFF=(ZPI*FR(NFRE))**2/G - CALL MEANSQS (XMODEL_CUTOFF, KIJS, KIJL, FL1, WAVNUM, UFRIC, COSWDIF, BOUT(KIJS,ITOBOUT(IR))) + CALL MEANSQS (XMODEL_CUTOFF, KIJS, KIJL, FL1, WAVNUM, UFRIC, COSWDIF, BOUT(:,ITOBOUT(73))) ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=0._JWRB + IF (IPFGTBL(74) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(74))=0._JWRB ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=0._JWRB + IF (IPFGTBL(75) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(75))=0._JWRB ENDIF - IR=IR+1 - IF (IPFGTBL(IR) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(IR))=0._JWRB + IF (IPFGTBL(76) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(76))=0._JWRB ENDIF ! APPLY SEA ICE MASK AND SEA MASK IF NECESSARY - CALL OUTSETWMASK (KIJS, KIJL, IODP(KIJS:KIJL), CICOVER, BOUT) + CALL OUTSETWMASK (KIJS, KIJL, IODP, CICOVER, BOUT) IF (LHOOK) CALL DR_HOOK('OUTBLOCK',1,ZHOOK_HANDLE) diff --git a/src/ecwam/outbs_loki_gpu.F90 b/src/ecwam/outbs_loki_gpu.F90 new file mode 100644 index 00000000..697cd7ed --- /dev/null +++ b/src/ecwam/outbs_loki_gpu.F90 @@ -0,0 +1,131 @@ +! (C) Copyright 1989- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +SUBROUTINE OUTBS_LOKI_GPU (MIJ, FL1, XLLWS, & + & WVPRPT, WVENVI, FF_NOW, INTFLDS, NEMO2WAM, & + & BOUT) +! ---------------------------------------------------------------------- + +!**** *OUTBS* - MODEL OUTPUT FROM BLOCK TO FILE, PRINTER AND COMMON. + +!* PURPOSE. +! -------- + +! CONTROL OUTPUT OF WAVE AND WIND FIELDS (except spectrum). + +!** INTERFACE. +! ---------- +! *CALL*OUTBS (MIJ, FL1, XLLWS, +! WVPRPT, WVENVI, FF_NOW, INTFLDS, NEMO2WAM, BOUT) +! *MIJ* - LAST FREQUENCY INDEX OF THE PROGNOSTIC RANGE. +! *FL1* - INPUT SPECTRUM. +! *XLLWS* - WINDSEA MASK FROM INPUT SOURCE TERM +! *WVENVI* - WAVE ENVIRONMENT (depth, currents,...) +! *FF_NOW* - FORCING FIELDS +! *INTFLDS* - INTEGRATED/DERIVED PARAMETERS +! *NEMO2WAM*- FIELDS FRON OCEAN MODEL to WAM +! *BOUT* - OUTPUT PARAMETERS BUFFER + + + +! EXTERNALS. +! ---------- + +! *OUTBLOCK* - GET ALL OUTPUT PARAMETERS +! +! METHOD. +! ------- + +! NONE. + +! REFERENCE. +! ---------- + +! NONE. + +! ---------------------------------------------------------------------- + USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU + USE YOWDRVTYPE , ONLY : ENVIRONMENT, FREQUENCY, FORCING_FIELDS, & + & INTGT_PARAM_FIELDS, OCEAN2WAVE + + USE YOWCOUT , ONLY : JPPFLAG ,NIPRMOUT + USE YOWCOUP , ONLY : LLNORMWAMOUT + USE YOWGRID , ONLY : NPROMA_WAM, NCHNK + USE YOWPARAM , ONLY : NANG ,NFRE + + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK + +! ---------------------------------------------------------------------- + IMPLICIT NONE + +#include "outblock.intfb.h" +#include "outwnorm.intfb.h" + + INTEGER(KIND=JWIM), DIMENSION(NPROMA_WAM, NCHNK), INTENT(IN) :: MIJ + REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(IN) :: FL1 + REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(IN) :: XLLWS + TYPE(FREQUENCY), INTENT(IN) :: WVPRPT + TYPE(ENVIRONMENT), INTENT(IN) :: WVENVI + TYPE(FORCING_FIELDS), INTENT(IN) :: FF_NOW + TYPE(INTGT_PARAM_FIELDS), INTENT(IN) :: INTFLDS + TYPE(OCEAN2WAVE), INTENT(IN) :: NEMO2WAM + REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NIPRMOUT, NCHNK), INTENT(OUT) :: BOUT + + + INTEGER(KIND=JWIM) :: M, IJ, ICHNK, KIJS, KIJL + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + LOGICAL :: LDREPROD + +! ---------------------------------------------------------------------- + +IF (LHOOK) CALL DR_HOOK('OUTBS_LOKI_GPU',0,ZHOOK_HANDLE) + +!* 1. COMPUTE MEAN PARAMETERS. +! ------------------------ + +! COMPUTE MEAN PARAMETERS +!$loki update_device + + CALL GSTATS(1502,0) +!$acc parallel loop gang default(present) copyin(NPROMA_WAM) copyout(BOUT) vector_length(NPROMA_WAM) + DO ICHNK = 1, NCHNK + CALL OUTBLOCK(1, NPROMA_WAM, MIJ(:,ICHNK), & + & FL1(:,:,:,ICHNK), XLLWS(:,:,:,ICHNK), & + & WVPRPT%WAVNUM(:,:,ICHNK), WVPRPT%CINV(:,:,ICHNK), WVPRPT%CGROUP(:,:,ICHNK), & + & WVENVI%DEPTH(:,ICHNK), WVENVI%UCUR(:,ICHNK), WVENVI%VCUR(:,ICHNK), & + & WVENVI%IODP(:,ICHNK), & + & INTFLDS%ALTWH(:,ICHNK), INTFLDS%CALTWH(:,ICHNK), INTFLDS%RALTCOR(:,ICHNK), & + & INTFLDS%USTOKES(:,ICHNK), INTFLDS%VSTOKES(:,ICHNK), INTFLDS%STRNMS(:,ICHNK), & + & INTFLDS%TAUXD(:,ICHNK), INTFLDS%TAUYD(:,ICHNK), INTFLDS%TAUOCXD(:,ICHNK), & + & INTFLDS%TAUOCYD(:,ICHNK), INTFLDS%TAUOC(:,ICHNK), INTFLDS%PHIOCD(:,ICHNK), & + & INTFLDS%PHIEPS(:,ICHNK), INTFLDS%PHIAW(:,ICHNK), & + & FF_NOW%AIRD(:,ICHNK), FF_NOW%WDWAVE(:,ICHNK), FF_NOW%CICOVER(:,ICHNK), & + & FF_NOW%WSWAVE(:,ICHNK), FF_NOW%WSTAR(:,ICHNK), & + & FF_NOW%UFRIC(:,ICHNK), FF_NOW%TAUW(:,ICHNK), & + & FF_NOW%Z0M(:,ICHNK), FF_NOW%Z0B(:,ICHNK), FF_NOW%CHRNCK(:,ICHNK), & + & FF_NOW%CITHICK(:,ICHNK), & + & NEMO2WAM%NEMOSST(:, ICHNK), NEMO2WAM%NEMOCICOVER(:,ICHNK), & + & NEMO2WAM%NEMOCITHICK(:, ICHNK), NEMO2WAM%NEMOUCUR(:,ICHNK), & + & NEMO2WAM%NEMOVCUR(:, ICHNK), & + & BOUT(:,:,ICHNK)) + ENDDO +!$acc end parallel loop + CALL GSTATS(1502,1) + +! PRINT OUT NORMS +!!!1 to do: decide if there are cases where we might want LDREPROD false + LDREPROD=.TRUE. + IF (LLNORMWAMOUT) CALL OUTWNORM(LDREPROD, BOUT) + + +IF (LHOOK) CALL DR_HOOK('OUTBS_LOKI_GPU',1,ZHOOK_HANDLE) + +END SUBROUTINE OUTBS_LOKI_GPU diff --git a/src/ecwam/outsetwmask.F90 b/src/ecwam/outsetwmask.F90 index 8e73de27..627c2eec 100644 --- a/src/ecwam/outsetwmask.F90 +++ b/src/ecwam/outsetwmask.F90 @@ -43,9 +43,9 @@ SUBROUTINE OUTSETWMASK (KIJS, KIJL, IODP, CICVR, BOUT) IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL), INTENT(IN) :: IODP - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: CICVR - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NIPRMOUT), INTENT(INOUT) :: BOUT + INTEGER(KIND=JWIM), DIMENSION(KIJL), INTENT(IN) :: IODP + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: CICVR + REAL(KIND=JWRB), DIMENSION(KIJL,NIPRMOUT), INTENT(INOUT) :: BOUT INTEGER(KIND=JWIM) :: IJ, ITG, IR diff --git a/src/ecwam/outspec.F90 b/src/ecwam/outspec.F90 index 57e2d645..c3cf84c9 100644 --- a/src/ecwam/outspec.F90 +++ b/src/ecwam/outspec.F90 @@ -70,7 +70,7 @@ SUBROUTINE OUTSPEC (FL1, FF_NOW) #include "difdate.intfb.h" REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(IN) :: FL1 - TYPE(FORCING_FIELDS), INTENT(INOUT) :: FF_NOW + TYPE(FORCING_FIELDS), INTENT(IN) :: FF_NOW INTEGER(KIND=JWIM) :: IJ, K, M diff --git a/src/ecwam/parmean.F90 b/src/ecwam/parmean.F90 index ea3a6128..58b4f659 100644 --- a/src/ecwam/parmean.F90 +++ b/src/ecwam/parmean.F90 @@ -69,9 +69,9 @@ SUBROUTINE PARMEAN (KIJS, KIJL, NPMAX, NPEAK, & IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL, NPMAX - INTEGER(KIND=JWIM), INTENT(IN), DIMENSION(KIJS:KIJL) :: NPEAK - REAL(KIND=JWRB), INTENT(IN) :: SPEC(NANG,NFRE,NPMAX,KIJS:KIJL) - REAL(KIND=JWRB), INTENT(OUT), DIMENSION(KIJS:KIJL,0:NPMAX) :: ENE, DIR, PER + INTEGER(KIND=JWIM), INTENT(IN), DIMENSION(KIJL) :: NPEAK + REAL(KIND=JWRB), INTENT(IN) :: SPEC(NANG,NFRE,NPMAX,KIJL) + REAL(KIND=JWRB), INTENT(OUT), DIMENSION(KIJL,0:NPMAX) :: ENE, DIR, PER INTEGER(KIND=JWIM) :: IPK, IJ, K, M, IP diff --git a/src/ecwam/scosfl.F90 b/src/ecwam/scosfl.F90 index 9b0e881d..4989f741 100644 --- a/src/ecwam/scosfl.F90 +++ b/src/ecwam/scosfl.F90 @@ -61,14 +61,14 @@ SUBROUTINE SCOSFL (KIJS, KIJL, F, MM, MEANCOSFL) IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL, NANG, NFRE), INTENT(IN) :: F - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL), INTENT(IN) :: MM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: MEANCOSFL + REAL(KIND=JWRB), DIMENSION(KIJL, NANG, NFRE), INTENT(IN) :: F + INTEGER(KIND=JWIM), DIMENSION(KIJL), INTENT(IN) :: MM + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: MEANCOSFL INTEGER(KIND=JWIM) :: IJ, K REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: MEANDIR, SI, CI + REAL(KIND=JWRB), DIMENSION(KIJL) :: MEANDIR, SI, CI ! ---------------------------------------------------------------------- diff --git a/src/ecwam/se10mean.F90 b/src/ecwam/se10mean.F90 index ae6d3d4c..a1429c62 100644 --- a/src/ecwam/se10mean.F90 +++ b/src/ecwam/se10mean.F90 @@ -59,8 +59,8 @@ SUBROUTINE SE10MEAN (KIJS, KIJL, FL1, E10) INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL, NANG, NFRE), INTENT(IN) :: FL1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: E10 + REAL(KIND=JWRB), DIMENSION(KIJL, NANG, NFRE), INTENT(IN) :: FL1 + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: E10 INTEGER(KIND=JWIM) :: IJ, M, K, MCUT @@ -69,7 +69,7 @@ SUBROUTINE SE10MEAN (KIJS, KIJL, FL1, E10) REAL(KIND=JWRB) :: DFCUT REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB), DIMENSION(NFRE) :: DFIMLOC - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP + REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP ! ---------------------------------------------------------------------- diff --git a/src/ecwam/sebtmean.F90 b/src/ecwam/sebtmean.F90 index 510dd558..44e1380f 100644 --- a/src/ecwam/sebtmean.F90 +++ b/src/ecwam/sebtmean.F90 @@ -61,9 +61,9 @@ SUBROUTINE SEBTMEAN (KIJS, KIJL, FL1, TB, TT, EBT) IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL, NANG, NFRE), INTENT(IN) :: FL1 + REAL(KIND=JWRB), DIMENSION(KIJL, NANG, NFRE), INTENT(IN) :: FL1 REAL(KIND=JWRB), INTENT(IN) :: TB, TT - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: EBT + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: EBT INTEGER(KIND=JWIM) :: IJ, M, K, MCUTB, MCUTT @@ -71,7 +71,7 @@ SUBROUTINE SEBTMEAN (KIJS, KIJL, FL1, TB, TT, EBT) REAL(KIND=JWRB) :: FCUTB, FCUTT, DFCUT, FBOT, FTOP, ZW REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB), DIMENSION(NFRE) :: DFIMLOC - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP + REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP ! ---------------------------------------------------------------------- diff --git a/src/ecwam/secspom.F90 b/src/ecwam/secspom.F90 index b5d65a05..10b78992 100644 --- a/src/ecwam/secspom.F90 +++ b/src/ecwam/secspom.F90 @@ -9,8 +9,8 @@ !-------------------------------------------------------------------- ! - SUBROUTINE SECSPOM(F1,F3,KIJS,KIJL,NFRE,NANG,NMAX,NDEPTH,DEPTHA, & - & DEPTHD,OMSTART,FRAC,MR,DFDTH,OMEGA,DEPTH, & + SUBROUTINE SECSPOM(F1,F3,KIJS,KIJL,NFRE,NANG,NDEPTH,DEPTHA, & + & DEPTHD,OMSTART,FRAC,MR,DFDTH,OMEGA,DEPTH, & & AKMEAN,TA,TB,TC_QL,TT_4M,TT_4P,IM_P,IM_M) ! !-------------------------------------------------------------------- @@ -84,6 +84,7 @@ SUBROUTINE SECSPOM(F1,F3,KIJS,KIJL,NFRE,NANG,NMAX,NDEPTH,DEPTHA, & !-------------------------------------------------------------------- USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU + USE YOWTABL, ONLY : NMAX USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK @@ -91,26 +92,26 @@ SUBROUTINE SECSPOM(F1,F3,KIJS,KIJL,NFRE,NANG,NMAX,NDEPTH,DEPTHA, & IMPLICIT NONE - INTEGER(KIND=JWIM),INTENT(IN) :: KIJS,KIJL,NFRE,NANG,NMAX,NDEPTH,MR + INTEGER(KIND=JWIM),INTENT(IN) :: KIJS,KIJL,NFRE,NANG,NDEPTH,MR INTEGER(KIND=JWIM),DIMENSION(NFRE,NFRE), INTENT(IN) :: IM_P, IM_M REAL(KIND=JWRB), INTENT(IN) :: DEPTHA, DEPTHD, OMSTART, FRAC REAL(KIND=JWRB), DIMENSION(NFRE), INTENT(IN) :: OMEGA, DFDTH - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: DEPTH, AKMEAN - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: F1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(OUT) :: F3 + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: DEPTH, AKMEAN + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: F1 + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(OUT) :: F3 REAL(KIND=JWRB), DIMENSION(NDEPTH,NANG,NFRE,NFRE), INTENT(IN) :: TA,TB,TC_QL,TT_4M,TT_4P INTEGER(KIND=JWIM):: IJ, M, K, M1, K1, M2_M, M2_P, K2, MP, MM,L,ID INTEGER(KIND=JWIM), DIMENSION(NANG,NANG) :: LL - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL) :: JD + INTEGER(KIND=JWIM), DIMENSION(KIJL) :: JD REAL(KIND=JWRB) :: OM0, OM0P, OM0M, OM0H, OM1 REAL(KIND=JWRB) :: T_4M, T_4P, DELM1, XD, X_MIN, OMRT, XLOGD, OMG5 REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB), DIMENSION(NMAX) :: OMEGA_EXT - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: XINCR, DF2KP, DF2KM, PSUM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NMAX) :: F2 + REAL(KIND=JWRB), DIMENSION(KIJL) :: XINCR, DF2KP, DF2KM, PSUM + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NMAX) :: F2 LOGICAL :: LLSAMEDPTH diff --git a/src/ecwam/sep3tr.F90 b/src/ecwam/sep3tr.F90 index cc3bafff..94e1afca 100644 --- a/src/ecwam/sep3tr.F90 +++ b/src/ecwam/sep3tr.F90 @@ -70,7 +70,7 @@ SUBROUTINE SEP3TR (KIJS, KIJL, FL1, MIJ, WSWAVE, WDWAVE , COSWDIF, & & TH ,COSTH ,SINTH ,FRIC USE YOWICE , ONLY : FLMIN USE YOWPARAM , ONLY : NANG ,NFRE - USE YOWPCONS , ONLY : ZPI ,G ,EPSMIN + USE YOWPCONS , ONLY : ZPI ,G ,EPSMIN, NPMAX USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK @@ -81,28 +81,27 @@ SUBROUTINE SEP3TR (KIJS, KIJL, FL1, MIJ, WSWAVE, WDWAVE , COSWDIF, & #include "semean.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: FL1 - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL), INTENT(IN) :: MIJ - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: WSWAVE, WDWAVE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG), INTENT(IN) :: COSWDIF - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: ESWELL, FSWELL, THSWELL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: FSEA - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(INOUT) :: FLSW, SWM - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NTRAIN), INTENT(OUT) :: EMTRAIN, THTRAIN, PMTRAIN + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: FL1 + INTEGER(KIND=JWIM), DIMENSION(KIJL), INTENT(IN) :: MIJ + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: WSWAVE, WDWAVE + REAL(KIND=JWRB), DIMENSION(KIJL,NANG), INTENT(IN) :: COSWDIF + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: ESWELL, FSWELL, THSWELL + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: FSEA + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(INOUT) :: FLSW, SWM + REAL(KIND=JWRB), DIMENSION(KIJL,NTRAIN), INTENT(OUT) :: EMTRAIN, THTRAIN, PMTRAIN - INTEGER(KIND=JWIM), PARAMETER :: NPMAX=20 INTEGER(KIND=JWIM) :: NPMAX_LOC INTEGER(KIND=JWIM) :: IJ, M, K, IP INTEGER(KIND=JWIM) :: ISORT, I, IPLOC INTEGER(KIND=JWIM) :: IFL, IFH, ITHL, ITHH INTEGER(KIND=JWIM) :: KM, KP - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL) :: NPEAK, NPK - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL) :: FRINVMIJ - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL) :: MMIN, MMAX - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL) :: IPNOW - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL,NTRAIN) :: IENERGY - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL,NPMAX) :: NFRP, NTHP + INTEGER(KIND=JWIM), DIMENSION(KIJL) :: NPEAK, NPK + INTEGER(KIND=JWIM), DIMENSION(KIJL) :: FRINVMIJ + INTEGER(KIND=JWIM), DIMENSION(KIJL) :: MMIN, MMAX + INTEGER(KIND=JWIM), DIMENSION(KIJL) :: IPNOW + INTEGER(KIND=JWIM), DIMENSION(KIJL,NTRAIN) :: IENERGY + INTEGER(KIND=JWIM), DIMENSION(KIJL,NPMAX) :: NFRP, NTHP ! relative value above max swell value that is considered above noise level REAL(KIND=JWRB), PARAMETER :: XNOISELEVEL=0.005_JWRB @@ -116,18 +115,18 @@ SUBROUTINE SEP3TR (KIJS, KIJL, FL1, MIJ, WSWAVE, WDWAVE , COSWDIF, & REAL(KIND=JWRB) :: COSDIR, FRLIMIT REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB) :: FLLOWEST - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: ENEX, SUMETRAIN - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: ETT - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: ENMAX, FLNOISE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG) :: SPRD - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,0:NPMAX) :: DIR, PER, ENE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NTRAIN) :: TEMPDIR, TEMPPER, TEMPENE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE) :: FL, FLLOW + REAL(KIND=JWRB), DIMENSION(KIJL) :: ENEX, SUMETRAIN + REAL(KIND=JWRB), DIMENSION(KIJL) :: ETT + REAL(KIND=JWRB), DIMENSION(KIJL) :: ENMAX, FLNOISE + REAL(KIND=JWRB), DIMENSION(KIJL,NANG) :: SPRD + REAL(KIND=JWRB), DIMENSION(KIJL,0:NPMAX) :: DIR, PER, ENE + REAL(KIND=JWRB), DIMENSION(KIJL,NTRAIN) :: TEMPDIR, TEMPPER, TEMPENE + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE) :: FL, FLLOW LOGICAL :: LLEPSMIN LOGICAL :: LLADDPART - LOGICAL, DIMENSION(KIJS:KIJL,NTRAIN) :: LPWSECTOR - LOGICAL, DIMENSION(KIJS:KIJL,NANG) :: LLCOSDIFF + LOGICAL, DIMENSION(KIJL,NTRAIN) :: LPWSECTOR + LOGICAL, DIMENSION(KIJL,NANG) :: LLCOSDIFF ! ---------------------------------------------------------------------- diff --git a/src/ecwam/sepwisw.F90 b/src/ecwam/sepwisw.F90 index 7e318b53..8055a14d 100644 --- a/src/ecwam/sepwisw.F90 +++ b/src/ecwam/sepwisw.F90 @@ -77,7 +77,7 @@ SUBROUTINE SEPWISW (KIJS, KIJL, MIJ, FL1, XLLWS, CINV, & USE YOWCOUT , ONLY : NTRAIN ,LLPARTITION USE YOWFRED , ONLY : FR ,TH ,FRIC ,OLDWSFC, ZPIFR - USE YOWPCONS , ONLY : G ,EPSMIN + USE YOWPCONS , ONLY : G ,EPSMIN, NPMAX USE YOWMAP , ONLY : CLDOMAIN USE YOWPARAM , ONLY : NANG ,NFRE @@ -95,15 +95,15 @@ SUBROUTINE SEPWISW (KIJS, KIJL, MIJ, FL1, XLLWS, CINV, & #include "wdirspread.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL), INTENT(IN) :: MIJ - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: FL1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: XLLWS - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE), INTENT(IN) :: CINV - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: UFRIC, WSWAVE, WDWAVE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG), INTENT(IN) :: COSWDIF - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: ESWELL ,FSWELL ,THSWELL, P1SWELL, P2SWELL, SPRDSWELL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: ESEA ,FSEA ,THWISEA, P1SEA , P2SEA , SPRDSEA - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NTRAIN), INTENT(OUT) :: EMTRAIN, THTRAIN, PMTRAIN + INTEGER(KIND=JWIM), DIMENSION(KIJL), INTENT(IN) :: MIJ + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: FL1 + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: XLLWS + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE), INTENT(IN) :: CINV + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: UFRIC, WSWAVE, WDWAVE + REAL(KIND=JWRB), DIMENSION(KIJL,NANG), INTENT(IN) :: COSWDIF + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: ESWELL ,FSWELL ,THSWELL, P1SWELL, P2SWELL, SPRDSWELL + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: ESEA ,FSEA ,THWISEA, P1SEA , P2SEA , SPRDSEA + REAL(KIND=JWRB), DIMENSION(KIJL,NTRAIN), INTENT(OUT) :: EMTRAIN, THTRAIN, PMTRAIN INTEGER(KIND=JWIM) :: IJ, K, M @@ -111,10 +111,10 @@ SUBROUTINE SEPWISW (KIJS, KIJL, MIJ, FL1, XLLWS, CINV, & REAL(KIND=JWRB) :: COEF REAL(KIND=JWRB) :: CHECKTA REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: R - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE) :: XINVWVAGE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG) :: DIRCOEF - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE) :: SWM, F1 + REAL(KIND=JWRB), DIMENSION(KIJL) :: R + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE) :: XINVWVAGE + REAL(KIND=JWRB), DIMENSION(KIJL,NANG) :: DIRCOEF + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE) :: SWM, F1 LOGICAL :: LLPEAKF diff --git a/src/ecwam/stat_nl.F90 b/src/ecwam/stat_nl.F90 index 8f3453cd..8a3a743a 100644 --- a/src/ecwam/stat_nl.F90 +++ b/src/ecwam/stat_nl.F90 @@ -55,8 +55,8 @@ SUBROUTINE STAT_NL(KIJS, KIJL, & #include "transf_r.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB),DIMENSION(KIJS:KIJL), INTENT(IN) :: XM0, XK0, BF2, XNU, SIG_TH, DPTH - REAL(KIND=JWRB),DIMENSION(KIJS:KIJL), INTENT(OUT) :: C3, C4, ETA_M, R, C4_B, C4_DYN + REAL(KIND=JWRB),DIMENSION(KIJL), INTENT(IN) :: XM0, XK0, BF2, XNU, SIG_TH, DPTH + REAL(KIND=JWRB),DIMENSION(KIJL), INTENT(OUT) :: C3, C4, ETA_M, R, C4_B, C4_DYN REAL(KIND=JWRB), PARAMETER :: EPS = 0.0001_JWRB REAL(KIND=JWRB), PARAMETER :: RMIN = 0._JWRB @@ -79,7 +79,7 @@ SUBROUTINE STAT_NL(KIJS, KIJL, & REAL(KIND=JWRB) :: DELTA_2D,C_0,C_S_SQ,V_G,V_G_SQ,ZFAC,ZFAC1,ZFAC2 REAL(KIND=JWRB) :: XKAPPA1,ALPHA,XJ REAL(KIND=JWRB) :: ZEPSILON, ZSQREPSILON - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TRANSF + REAL(KIND=JWRB), DIMENSION(KIJL) :: TRANSF !----------------------------------------------------------------------- diff --git a/src/ecwam/sthq.F90 b/src/ecwam/sthq.F90 index 5c00ff16..be4c6bc5 100644 --- a/src/ecwam/sthq.F90 +++ b/src/ecwam/sthq.F90 @@ -60,12 +60,12 @@ SUBROUTINE STHQ (KIJS, KIJL, FL1, THQ) IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: FL1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: THQ + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: FL1 + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: THQ INTEGER(KIND=JWIM) :: IJ, M, K REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP, SI, CI + REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP, SI, CI ! ---------------------------------------------------------------------- diff --git a/src/ecwam/transf_bfi.F90 b/src/ecwam/transf_bfi.F90 index 6464e11b..73b2b636 100644 --- a/src/ecwam/transf_bfi.F90 +++ b/src/ecwam/transf_bfi.F90 @@ -35,6 +35,7 @@ REAL(KIND=JWRB) FUNCTION TRANSF_BFI(XK0,D,XNU,SIG_TH) !---------------------------------------------------------------------- IMPLICIT NONE +!$loki routine seq REAL(KIND=JWRB), INTENT(IN) :: XK0,D,XNU,SIG_TH REAL(KIND=JWRB), PARAMETER :: EPS=0.0001_JWRB diff --git a/src/ecwam/transf_r.F90 b/src/ecwam/transf_r.F90 index de5aba93..367a05dc 100644 --- a/src/ecwam/transf_r.F90 +++ b/src/ecwam/transf_r.F90 @@ -32,6 +32,7 @@ REAL(KIND=JWRB) FUNCTION TRANSF_R(XK0,D) !---------------------------------------------------------------------- IMPLICIT NONE +!$loki routine seq REAL(KIND=JWRB), PARAMETER :: EPS=0.0001_JWRB REAL(KIND=JPHOOK) :: ZHOOK_HANDLE diff --git a/src/ecwam/w_maxh.F90 b/src/ecwam/w_maxh.F90 index a6afa650..0b067c13 100644 --- a/src/ecwam/w_maxh.F90 +++ b/src/ecwam/w_maxh.F90 @@ -53,14 +53,14 @@ SUBROUTINE W_MAXH (KIJS, KIJL, F, DEPTH, WAVNUM, & INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: F !! BLOCK OF SPECTRA -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: DEPTH !! DEPTH -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE), INTENT(IN) :: WAVNUM !! WAVE NUMBER -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: CMAX_F !! MAXIMUM CREST H.- TIME (FORRISTALL) -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: HMAX_N !! MAXIMUM WAVE H.- TIME (NAESS) -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: CMAX_ST !! MAXIMUM CREST H.- SPACE-TIME (STQD) -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: HMAX_ST !! MAXIMUM WAVE H.- SPACE-TIME (STQD) -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: PHIST !! 1st minimum of the aotocovariance function +REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: F !! BLOCK OF SPECTRA +REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: DEPTH !! DEPTH +REAL(KIND=JWRB), DIMENSION(KIJL,NFRE), INTENT(IN) :: WAVNUM !! WAVE NUMBER +REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: CMAX_F !! MAXIMUM CREST H.- TIME (FORRISTALL) +REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: HMAX_N !! MAXIMUM WAVE H.- TIME (NAESS) +REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: CMAX_ST !! MAXIMUM CREST H.- SPACE-TIME (STQD) +REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: HMAX_ST !! MAXIMUM WAVE H.- SPACE-TIME (STQD) +REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: PHIST !! 1st minimum of the aotocovariance function ! LOCAL VARIABLES. ! ---------------- @@ -74,7 +74,7 @@ SUBROUTINE W_MAXH (KIJS, KIJL, F, DEPTH, WAVNUM, & REAL(KIND=JWRB), PARAMETER :: TOL = 0.01_JWRB ! GOLDEN SEARCH TOLERANCE INTEGER(KIND=JWIM) :: IJ, IT, M, K -INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL) :: K_THMAX +INTEGER(KIND=JWIM), DIMENSION(KIJL) :: K_THMAX REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB) :: Z0, RNW, ALFA, BETA, URSN, STEEP, WNUM1 @@ -82,17 +82,17 @@ SUBROUTINE W_MAXH (KIJS, KIJL, F, DEPTH, WAVNUM, & REAL(KIND=JWRB) :: ZEPSILON REAL(KIND=JWRB) :: DELT25, EM, XK2_DFIM, XK_ZPI_DFIM REAL(KIND=JWRB), DIMENSION(4) :: TLGS, ACFS -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: HS, RLX, RLY, AXT, AYT, AXY -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: WMDX, WMDY ! SPACE TIME EXTREME OVER WMDX x WMDY m**2 AREA -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: WMDUR_TD ! TIME EXTREME OVER WMDUR_TD for TIME DOMAIN in sec. -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: WMDUR_ST ! TIME EXTREME OVER WMDUR_ST for SPACE TIME in sec. -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: FMAX -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: ACF, T1, T2, EMEAN -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP, TEMP_X, TEMP_Y, TEMP_X2, TEMP_Y2, TEMP_XY -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: RNI, RMU +REAL(KIND=JWRB), DIMENSION(KIJL) :: HS, RLX, RLY, AXT, AYT, AXY +REAL(KIND=JWRB), DIMENSION(KIJL) :: WMDX, WMDY ! SPACE TIME EXTREME OVER WMDX x WMDY m**2 AREA +REAL(KIND=JWRB), DIMENSION(KIJL) :: WMDUR_TD ! TIME EXTREME OVER WMDUR_TD for TIME DOMAIN in sec. +REAL(KIND=JWRB), DIMENSION(KIJL) :: WMDUR_ST ! TIME EXTREME OVER WMDUR_ST for SPACE TIME in sec. +REAL(KIND=JWRB), DIMENSION(KIJL) :: FMAX +REAL(KIND=JWRB), DIMENSION(KIJL) :: ACF, T1, T2, EMEAN +REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP, TEMP_X, TEMP_Y, TEMP_X2, TEMP_Y2, TEMP_XY +REAL(KIND=JWRB), DIMENSION(KIJL) :: RNI, RMU REAL(KIND=JWRB), DIMENSION(NFRE) :: OMEGA -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL, NANG) :: CX, CY, CX2, CY2, CXCY -REAL(KIND=JWRB), DIMENSION(KIJS:KIJL, NFRE) :: TEMPDFIM +REAL(KIND=JWRB), DIMENSION(KIJL, NANG) :: CX, CY, CX2, CY2, CXCY +REAL(KIND=JWRB), DIMENSION(KIJL, NFRE) :: TEMPDFIM ! ---------------------------------------------------------------------------- @@ -207,34 +207,34 @@ SUBROUTINE W_MAXH (KIJS, KIJL, F, DEPTH, WAVNUM, & ENDIF ENDDO -WHERE (EMEAN > ZEPSILON) - AXY = MIN(AXY/SQRT(RLX*RLY), 1._JWRB) - AXT = MIN(AXT/(ZPI*SQRT(RLX*T2)), 1._JWRB) - AYT = MIN(AYT/(ZPI*SQRT(RLY*T2)), 1._JWRB) - RLX = ZPI*SQRT(EMEAN/RLX) - RLY = ZPI*SQRT(EMEAN/RLY) - RNI = SQRT(MAX(EMEAN*T2/T1**2 - 1._JWRB,ZEPSILON)) - RMU = (ZPI*T1)**2*(1._JWRB-RNI+RNI**2)/(G*EMEAN**THREEHALF) - T1 = MIN(MAX(EMEAN/T1,TMIN),TMAX) - T2 = MIN(MAX(SQRT(EMEAN/T2),TMIN),TMAX) +WHERE (EMEAN(KIJS:KIJL) > ZEPSILON) + AXY(KIJS:KIJL) = MIN(AXY(KIJS:KIJL)/SQRT(RLX(KIJS:KIJL)*RLY(KIJS:KIJL)), 1._JWRB) + AXT(KIJS:KIJL) = MIN(AXT(KIJS:KIJL)/(ZPI*SQRT(RLX(KIJS:KIJL)*T2(KIJS:KIJL))), 1._JWRB) + AYT(KIJS:KIJL) = MIN(AYT(KIJS:KIJL)/(ZPI*SQRT(RLY(KIJS:KIJL)*T2(KIJS:KIJL))), 1._JWRB) + RLX(KIJS:KIJL) = ZPI*SQRT(EMEAN(KIJS:KIJL)/RLX(KIJS:KIJL)) + RLY(KIJS:KIJL) = ZPI*SQRT(EMEAN(KIJS:KIJL)/RLY(KIJS:KIJL)) + RNI(KIJS:KIJL) = SQRT(MAX(EMEAN(KIJS:KIJL)*T2(KIJS:KIJL)/T1(KIJS:KIJL)**2 - 1._JWRB,ZEPSILON)) + RMU(KIJS:KIJL) = (ZPI*T1(KIJS:KIJL))**2*(1._JWRB-RNI+RNI**2)/(G*EMEAN(KIJS:KIJL)**THREEHALF) + T1(KIJS:KIJL) = MIN(MAX(EMEAN(KIJS:KIJL)/T1(KIJS:KIJL),TMIN),TMAX) + T2(KIJS:KIJL) = MIN(MAX(SQRT(EMEAN(KIJS:KIJL)/T2(KIJS:KIJL)),TMIN),TMAX) !!! WMDUR_TD = XNWVP*T2 - WMDX = MAX(RLX,WVLMIN) - WMDY = MAX(RLY,WVLMIN) - WMDUR_ST = XNWVP*T2 + WMDX(KIJS:KIJL) = MAX(RLX(KIJS:KIJL),WVLMIN) + WMDY(KIJS:KIJL) = MAX(RLY(KIJS:KIJL),WVLMIN) + WMDUR_ST(KIJS:KIJL) = XNWVP*T2(KIJS:KIJL) ELSEWHERE - AXY = 0._JWRB - AXT = 0._JWRB - AYT = 0._JWRB - RLX = 1._JWRB - RLY = 1._JWRB - RNI = 0._JWRB - RMU = 0._JWRB - T1 = TMIN - T2 = TMIN + AXY(KIJS:KIJL) = 0._JWRB + AXT(KIJS:KIJL) = 0._JWRB + AYT(KIJS:KIJL) = 0._JWRB + RLX(KIJS:KIJL) = 1._JWRB + RLY(KIJS:KIJL) = 1._JWRB + RNI(KIJS:KIJL) = 0._JWRB + RMU(KIJS:KIJL) = 0._JWRB + T1(KIJS:KIJL) = TMIN + T2(KIJS:KIJL) = TMIN !!! WMDUR_TD = XNWVP*T2 - WMDX = MAX(RLX,WVLMIN) - WMDY = MAX(RLY,WVLMIN) - WMDUR_ST = XNWVP*T2 + WMDX(KIJS:KIJL) = MAX(RLX(KIJS:KIJL),WVLMIN) + WMDY(KIJS:KIJL) = MAX(RLY(KIJS:KIJL),WVLMIN) + WMDUR_ST(KIJS:KIJL) = XNWVP*T2(KIJS:KIJL) END WHERE ! MIN OF AUTOCOVARIANCE FUNCTION VIA GOLDEN RATIO SEARCH diff --git a/src/ecwam/w_mode_st.F90 b/src/ecwam/w_mode_st.F90 index f6b03cb9..adb3bc0c 100644 --- a/src/ecwam/w_mode_st.F90 +++ b/src/ecwam/w_mode_st.F90 @@ -33,6 +33,7 @@ REAL(KIND=JWRB) FUNCTION W_MODE_ST (RN3, RN2, RN1) ! ---------------------------------------------------------------------- IMPLICIT NONE +!$loki routine seq ! INTERFACE VARIABLES. ! ! -------------------- ! diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 8aace0ff..df094375 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -188,8 +188,6 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & ENDIF -LUPDATE_GPU_GLOBALS = .FALSE. - IF (LHOOK) CALL DR_HOOK('WAMINTGR',1,ZHOOK_HANDLE) END SUBROUTINE WAMINTGR_LOKI_GPU diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index 84769b0a..d028633d 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -74,7 +74,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & IDELWI ,IREST ,IDELRES ,IDELINT , & & CDTBC ,IDELBC , & & IASSI ,MARSTYPE , & - & LLSOURCE ,LANAONLY ,LFRSTFLD ,IREFDATE + & LLSOURCE ,LANAONLY ,LFRSTFLD ,IREFDATE, LUPDATE_GPU_GLOBALS USE YOWSPEC, ONLY : NBLKS ,NBLKE USE YOWTEST , ONLY : IU06 USE YOWTEXT , ONLY : ICPLEN ,CPATH ,CWI ,LRESTARTED @@ -103,7 +103,6 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & #include "iwam_get_unit.intfb.h" #include "incdate.intfb.h" #include "outbc.intfb.h" -#include "outbs.intfb.h" #include "outspec.intfb.h" #include "outstep0.intfb.h" #include "savspec.intfb.h" @@ -114,8 +113,10 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & #include "writsta.intfb.h" #ifdef WAM_GPU +#include "outbs_loki_gpu.intfb.h" #include "wamintgr_loki_gpu.intfb.h" #else +#include "outbs.intfb.h" #include "wamintgr.intfb.h" #endif @@ -140,7 +141,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & TYPE(MIJ_TYPE) :: MIJ REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, MAX(NIPRMOUT,1), NCHNK) :: BOUT + REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NIPRMOUT, NCHNK) :: BOUT CHARACTER(LEN= 2) :: MARSTYPEBAK CHARACTER(LEN=14) :: CDATEWH, CZERO @@ -327,26 +328,38 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & CALL FF_NOW%SYNC_HOST_RDONLY() CALL OUTWPSP (VARS_4D%FL1, FF_NOW) + + CALL VARS_4D%F_FL1%SYNC_DEVICE_RDWR() + CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & + & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) ENDIF - CALL WVPRPT%SYNC_HOST_RDWR() - CALL WVENVI%SYNC_HOST_RDWR() - CALL FF_NOW%SYNC_HOST_RDWR() - CALL FF_NEXT%SYNC_HOST_RDWR() - CALL WAM2NEMO%SYNC_HOST_RDWR() - CALL INTFLDS%SYNC_HOST_RDWR() - CALL VARS_4D%SYNC_HOST_RDWR() - CALL MIJ%SYNC_HOST_RDWR() - CALL BLK2GLO%SYNC_HOST_RDWR() ! 1.6 COMPUTE OUTPUT PARAMETERS FIELDS AND PRINT OUT NORMS ! ---------------------------------------------------- IF ( (CDTINTT == CDTPRO .OR. LRST) .AND. NIPRMOUT > 0 ) THEN + +#ifdef WAM_GPU + CALL OUTBS_LOKI_GPU (MIJ%PTR, VARS_4D%FL1, VARS_4D%XLLWS, & + & WVPRPT, WVENVI, FF_NOW, INTFLDS, NEMO2WAM, & + & BOUT) +#else CALL OUTBS (MIJ%PTR, VARS_4D%FL1, VARS_4D%XLLWS, & & WVPRPT, WVENVI, FF_NOW, INTFLDS, NEMO2WAM, & & BOUT) +#endif ENDIF + CALL WVPRPT%SYNC_HOST_RDWR() + CALL WVENVI%SYNC_HOST_RDWR() + CALL FF_NOW%SYNC_HOST_RDWR() + CALL FF_NEXT%SYNC_HOST_RDWR() + CALL WAM2NEMO%SYNC_HOST_RDWR() + CALL INTFLDS%SYNC_HOST_RDWR() + CALL VARS_4D%SYNC_HOST_RDWR() + CALL MIJ%SYNC_HOST_RDWR() + CALL BLK2GLO%SYNC_HOST_RDWR() !* 1.7 ONE PROPAGATION TIMESTEP DONE ! ----------------------------- @@ -561,6 +574,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ENDIF + LUPDATE_GPU_GLOBALS = .FALSE. !* BRANCHING BACK TO 1.0 FOR NEXT PROPAGATION STEP. ENDDO ADVECTION diff --git a/src/ecwam/wdirspread.F90 b/src/ecwam/wdirspread.F90 index fc3c4ba4..8bd8568b 100644 --- a/src/ecwam/wdirspread.F90 +++ b/src/ecwam/wdirspread.F90 @@ -55,6 +55,7 @@ SUBROUTINE WDIRSPREAD (KIJS, KIJL, F, EMEAN, LLPEAKF, WDIRSPRD) USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWFRED , ONLY : FR ,DFIM ,DELTH ,WETAIL + USE YOWFRED , ONLY : DELTH ,TH ,COSTH ,SINTH !... needed for Loki USE YOWPARAM , ONLY : NANG ,NFRE USE YOWPCONS , ONLY : EPSMIN @@ -68,18 +69,18 @@ SUBROUTINE WDIRSPREAD (KIJS, KIJL, F, EMEAN, LLPEAKF, WDIRSPRD) #include "scosfl.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: F - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(IN) :: EMEAN + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: F + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(IN) :: EMEAN LOGICAL, INTENT(IN) :: LLPEAKF - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: WDIRSPRD + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: WDIRSPRD INTEGER(KIND=JWIM) :: IJ, M - INTEGER(KIND=JWIM), DIMENSION(KIJS:KIJL) :: IFRINDEX + INTEGER(KIND=JWIM), DIMENSION(KIJL) :: IFRINDEX REAL(KIND=JWRB) :: COEF_FR, ONE REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE) :: F1D + REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE) :: F1D LOGICAL :: LL_HALT_INVALID @@ -89,8 +90,10 @@ SUBROUTINE WDIRSPREAD (KIJS, KIJL, F, EMEAN, LLPEAKF, WDIRSPRD) ! Turn off Floating-Point-Exceptions in this scope to avoid FPE_INVALID in optimized code ! with branch prediction. It is safe to do so as DIV_BY_ZERO is protected. +#ifndef WAM_GPU CALL IEEE_GET_HALTING_MODE(IEEE_INVALID, LL_HALT_INVALID) IF (LL_HALT_INVALID) CALL IEEE_SET_HALTING_MODE(IEEE_INVALID, .FALSE.) +#endif !* 1. INITIALIZE ARRAYS ! ----------------- @@ -105,6 +108,7 @@ SUBROUTINE WDIRSPREAD (KIJS, KIJL, F, EMEAN, LLPEAKF, WDIRSPRD) IF(LLPEAKF) THEN ! COMPUTATION IS BASED ON THE PEAK FREQUENCY CALL PEAKFRI (KIJS, KIJL, F, IFRINDEX, TEMP, F1D) + !$loki inline CALL SCOSFL (KIJS, KIJL, F, IFRINDEX, WDIRSPRD) DO IJ = KIJS,KIJL IF(TEMP(IJ) > 0.0_JWRB) THEN @@ -118,6 +122,7 @@ SUBROUTINE WDIRSPREAD (KIJS, KIJL, F, EMEAN, LLPEAKF, WDIRSPRD) ! COMPUTATION IS BASED ON THE WHOLE FREQUENCY RANGE DO M = 1,NFRE IFRINDEX=M + !$loki inline CALL SCOSFL (KIJS, KIJL, F, IFRINDEX, TEMP) DO IJ = KIJS,KIJL WDIRSPRD(IJ) = WDIRSPRD(IJ) + TEMP(IJ)*DFIM(M) @@ -148,7 +153,9 @@ SUBROUTINE WDIRSPREAD (KIJS, KIJL, F, EMEAN, LLPEAKF, WDIRSPRD) WDIRSPRD(IJ) = SQRT(2.0_JWRB*(ONE-WDIRSPRD(IJ))) ENDDO +#ifndef WAM_GPU IF (LL_HALT_INVALID) CALL IEEE_SET_HALTING_MODE(IEEE_INVALID, .TRUE.) +#endif IF (LHOOK) CALL DR_HOOK('WDIRSPREAD',1,ZHOOK_HANDLE) diff --git a/src/ecwam/weflux.F90 b/src/ecwam/weflux.F90 index 4b4ce12c..e32eb7c2 100644 --- a/src/ecwam/weflux.F90 +++ b/src/ecwam/weflux.F90 @@ -72,21 +72,21 @@ SUBROUTINE WEFLUX (KIJS, KIJL, FL1, CGROUP, & IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NANG,NFRE), INTENT(IN) :: FL1 - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE), INTENT(IN) :: CGROUP + REAL(KIND=JWRB), DIMENSION(KIJL,NANG,NFRE), INTENT(IN) :: FL1 + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE), INTENT(IN) :: CGROUP INTEGER(KIND=JWIM), INTENT(IN) :: NFRE, NANG REAL(KIND=JWRB), INTENT(IN) :: DELTH REAL(KIND=JWRB), DIMENSION(NFRE), INTENT(IN) :: DFIM REAL(KIND=JWRB), DIMENSION(NANG), INTENT(IN) :: COSTH, SINTH - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: WEFMAG, WEFDIR + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: WEFMAG, WEFDIR INTEGER(KIND=JWIM) :: IJ, M, K REAL(KIND=JWRB) :: ROG, FCG, DELT REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: TEMP, TEMPX, TEMPY - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: WEFX, WEFY + REAL(KIND=JWRB), DIMENSION(KIJL) :: TEMP, TEMPX, TEMPY + REAL(KIND=JWRB), DIMENSION(KIJL) :: WEFX, WEFY ! ! ---------------------------------------------------------------------- diff --git a/src/ecwam/yowpcons.F90 b/src/ecwam/yowpcons.F90 index cd646646..3ded42d5 100644 --- a/src/ecwam/yowpcons.F90 +++ b/src/ecwam/yowpcons.F90 @@ -66,6 +66,7 @@ MODULE YOWPCONS REAL(KIND=JWRB), PARAMETER :: CDMAX=0.0025_JWRB REAL(KIND=JWRB), PARAMETER :: FM2FP=0.9_JWRB + INTEGER(KIND=JWIM), PARAMETER :: NPMAX=20 !* VARIABLE. TYPE. PURPOSE. ! --------- ------- -------- From ee19884e3ef19d0207bcbc1163e7d54df5fe80c5 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 22 May 2024 14:17:38 +0000 Subject: [PATCH 03/17] Move data transfers out of ADVECTION loop --- src/ecwam/wamodel.F90 | 104 ++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 45 deletions(-) diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index d028633d..ed9e6cc5 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -201,29 +201,29 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & !* 1. ADVECTION/PHYSICS TIME LOOP. ! ---------------------------- + CALL VARS_4D%SYNC_DEVICE_RDWR() + CALL BLK2GLO%SYNC_DEVICE_RDONLY() + CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & + & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) + CALL FF_NEXT%SYNC_DEVICE_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & + & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) + CALL WVPRPT%SYNC_DEVICE_RDWR() + CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & + & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) + CALL WAM2NEMO%SYNC_DEVICE_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & + & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & + & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) + CALL INTFLDS%SYNC_DEVICE_RDWR(WSEMEAN=.TRUE., WSFMEAN=.TRUE., USTOKES=.TRUE., & + & VSTOKES=.TRUE., STRNMS=.TRUE., TAUXD=.TRUE., TAUYD=.TRUE., TAUOCXD=.TRUE., & + & TAUOCYD=.TRUE., TAUOC=.TRUE., PHIOCD=.TRUE., PHIEPS=.TRUE., PHIAW=.TRUE.) + CALL MIJ%SYNC_DEVICE_RDWR() + ADVECTION : DO KADV = 1,NADV !* 1.1 FIX END DATE OF THIS PROPAGATION STEP AND OUTPUT TIMES. ! ------------------------------------------------------- - CALL VARS_4D%SYNC_DEVICE_RDWR() - CALL BLK2GLO%SYNC_DEVICE_RDONLY() - CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & - & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & - & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) - CALL FF_NEXT%SYNC_DEVICE_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & - & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & - & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) - CALL WVPRPT%SYNC_DEVICE_RDWR() - CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & - & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) - CALL WAM2NEMO%SYNC_DEVICE_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & - & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & - & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) - CALL INTFLDS%SYNC_DEVICE_RDWR(WSEMEAN=.TRUE., WSFMEAN=.TRUE., USTOKES=.TRUE., & - & VSTOKES=.TRUE., STRNMS=.TRUE., TAUXD=.TRUE., TAUYD=.TRUE., TAUOCXD=.TRUE., & - & TAUOCYD=.TRUE., TAUOC=.TRUE., PHIOCD=.TRUE., PHIEPS=.TRUE., PHIAW=.TRUE.) - CALL MIJ%SYNC_DEVICE_RDWR() - CDTPRA = CDTPRO CALL INCDATE(CDTPRO, IDELPRO) @@ -319,23 +319,6 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & IF (IBOUNC == 1) CALL OUTBC (VARS_4D%FL1, BLK2GLO, IU19) !NEST - -!* 1.5 POINT OUTPUT (not usually used at ECMWF) -! ---------------------------------------- - IF ( NGOUT > 0 .AND. (CDTINTT == CDTPRO .OR. LRST) ) THEN -! OUTPUT POINT SPECTRA (not usually used at ECMWF) - CALL VARS_4D%F_FL1%SYNC_HOST_RDONLY() - CALL FF_NOW%SYNC_HOST_RDONLY() - - CALL OUTWPSP (VARS_4D%FL1, FF_NOW) - - CALL VARS_4D%F_FL1%SYNC_DEVICE_RDWR() - CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & - & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & - & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) - ENDIF - - ! 1.6 COMPUTE OUTPUT PARAMETERS FIELDS AND PRINT OUT NORMS ! ---------------------------------------------------- IF ( (CDTINTT == CDTPRO .OR. LRST) .AND. NIPRMOUT > 0 ) THEN @@ -351,15 +334,16 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & #endif ENDIF - CALL WVPRPT%SYNC_HOST_RDWR() - CALL WVENVI%SYNC_HOST_RDWR() - CALL FF_NOW%SYNC_HOST_RDWR() - CALL FF_NEXT%SYNC_HOST_RDWR() - CALL WAM2NEMO%SYNC_HOST_RDWR() - CALL INTFLDS%SYNC_HOST_RDWR() - CALL VARS_4D%SYNC_HOST_RDWR() - CALL MIJ%SYNC_HOST_RDWR() - CALL BLK2GLO%SYNC_HOST_RDWR() +!* 1.5 POINT OUTPUT (not usually used at ECMWF) +! ---------------------------------------- + IF ( NGOUT > 0 .AND. (CDTINTT == CDTPRO .OR. LRST) ) THEN +! OUTPUT POINT SPECTRA (not usually used at ECMWF) + CALL VARS_4D%F_FL1%SYNC_HOST_RDONLY() + CALL FF_NOW%SYNC_HOST_RDONLY() + + CALL OUTWPSP (VARS_4D%FL1, FF_NOW) + ENDIF + !* 1.7 ONE PROPAGATION TIMESTEP DONE ! ----------------------------- @@ -408,6 +392,9 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & MARSTYPE='an' ENDIF + CALL VARS_4D%F_FL1%SYNC_HOST_RDONLY() + CALL FF_NOW%SYNC_HOST_RDONLY() + CALL OUTSPEC(VARS_4D%FL1, FF_NOW) LLFLUSH = .TRUE. @@ -421,6 +408,9 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ! 1.8.2 SAVE RESTART FILES IN PURE BINARY FORM (in needed) ! -------------------------------------- IF ( .NOT.LGRIBOUT .OR. LDWRRE ) THEN + CALL FL1%SYNC_HOST_RDONLY() + CALL FF_NOW%SYNC_HOST_RDONLY() + CALL WVENVI%SYNC_HOST_RDONLY() CALL SAVSTRESS(WVENVI, FF_NOW, NBLKS, NBLKE, CDTPRO, CDATEF) WRITE(IU06,*) ' ' @@ -511,6 +501,13 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ENDIF + CALL FL1%SYNC_DEVICE_RDWR() + CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & + & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) + CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & + & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) + !* 1.10 FLUSH FDB IF IT HAS BEEN USED AND IT IS NOT AN ANALYSIS (it will be done in *wamassi*) ! ------------------------------------------------------- @@ -559,6 +556,9 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & NEMOWSTEP=NEMOWSTEP+1 IF (MOD(NEMOWSTEP,NEMOFRCO) == 0) THEN + + CALL WAM2NEMO%SYNC_HOST_RDONLY() + CALL UPDNEMOFIELDS CALL UPDNEMOSTRESS @@ -570,6 +570,10 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ENDDO #endif NEMOCSTEP = NEMOCSTEP + NEMONSTEP + + CALL WAM2NEMO%SYNC_DEVICE_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & + & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & + & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) ENDIF ENDIF @@ -578,6 +582,16 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & !* BRANCHING BACK TO 1.0 FOR NEXT PROPAGATION STEP. ENDDO ADVECTION + CALL WVPRPT%SYNC_HOST_RDWR() + CALL WVENVI%SYNC_HOST_RDWR() + CALL FF_NOW%SYNC_HOST_RDWR() + CALL FF_NEXT%SYNC_HOST_RDWR() + CALL WAM2NEMO%SYNC_HOST_RDWR() + CALL INTFLDS%SYNC_HOST_RDWR() + CALL VARS_4D%SYNC_HOST_RDWR() + CALL MIJ%SYNC_HOST_RDWR() + CALL BLK2GLO%SYNC_HOST_RDWR() + IF(MIJ%LALLOC) CALL MIJ%DEALLOC() IF (LHOOK) CALL DR_HOOK('WAMODEL',1,ZHOOK_HANDLE) From 1823420f1d8b91898c4330295efbc8702305057d Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 22 May 2024 16:09:09 +0000 Subject: [PATCH 04/17] Use asynchronous data transfers --- src/ecwam/drvtype_mod.fypp | 172 ++++++++++++++++++++++++++++++-- src/ecwam/ecwam_loki_gpu.config | 2 +- src/ecwam/initdpthflds.F90 | 2 +- src/ecwam/wamintgr_loki_gpu.F90 | 32 ++++++ src/ecwam/wamodel.F90 | 83 ++++++++++----- 5 files changed, 260 insertions(+), 31 deletions(-) diff --git a/src/ecwam/drvtype_mod.fypp b/src/ecwam/drvtype_mod.fypp index b61c45ef..9928fd45 100644 --- a/src/ecwam/drvtype_mod.fypp +++ b/src/ecwam/drvtype_mod.fypp @@ -53,6 +53,10 @@ MODULE ${obj.upper()}$_TYPE_MOD PROCEDURE :: SYNC_DEVICE_RDONLY => ${obj.upper()}$_SYNC_DEVICE_RDONLY PROCEDURE :: SYNC_HOST_RDWR => ${obj.upper()}$_SYNC_HOST_RDWR PROCEDURE :: SYNC_HOST_RDONLY => ${obj.upper()}$_SYNC_HOST_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => ${obj.upper()}$_GET_DEVICE_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => ${obj.upper()}$_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => ${obj.upper()}$_GET_HOST_DATA_RDWR + PROCEDURE :: GET_HOST_DATA_RDONLY => ${obj.upper()}$_GET_HOST_DATA_RDONLY #:endif END TYPE ${obj.upper()}$ @@ -100,7 +104,7 @@ MODULE ${obj.upper()}$_TYPE_MOD END SUBROUTINE ${obj.upper()}$_DEALLOC #:if defined('WAM_GPU') - SUBROUTINE ${obj.upper()}$_SYNC_DEVICE_RDWR(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$) + SUBROUTINE ${obj.upper()}$_GET_DEVICE_DATA_RDWR(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$) CLASS(${obj.upper()}$) :: SELF #:for _, vars in zip(_def['types'], _def['vars']) #:for var in vars @@ -138,9 +142,9 @@ MODULE ${obj.upper()}$_TYPE_MOD #:endfor ENDIF - END SUBROUTINE ${obj.upper()}$_SYNC_DEVICE_RDWR + END SUBROUTINE ${obj.upper()}$_GET_DEVICE_DATA_RDWR - SUBROUTINE ${obj.upper()}$_SYNC_DEVICE_RDONLY(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$) + SUBROUTINE ${obj.upper()}$_GET_DEVICE_DATA_RDONLY(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$) CLASS(${obj.upper()}$) :: SELF #:for _, vars in zip(_def['types'], _def['vars']) #:for var in vars @@ -178,9 +182,87 @@ MODULE ${obj.upper()}$_TYPE_MOD #:endfor ENDIF + END SUBROUTINE ${obj.upper()}$_GET_DEVICE_DATA_RDONLY + + SUBROUTINE ${obj.upper()}$_SYNC_DEVICE_RDWR(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$, QUEUE) + CLASS(${obj.upper()}$) :: SELF + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + #:set flag = ' LOGICAL, INTENT(IN), OPTIONAL :: ' + var.upper() + $:flag + #:set flag = ' LOGICAL :: ' + f'L_{var.upper()} = .FALSE.' + $:flag + #:endfor + #:endfor + INTEGER(KIND=JWIM), INTENT(IN), OPTIONAL :: QUEUE + + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + IF(PRESENT(${var.upper()}$)) L_${var.upper()}$ = ${var.upper()}$ + #:endfor + #:endfor + + + IF(${'.OR.'.join(f'L_{var.upper()}' for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$)THEN + !... copy only selected members + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + IF(${f'L_{var.upper()}'}$)THEN + CALL SELF%F_${var.upper()}$%SYNC_DEVICE_RDWR(QUEUE=QUEUE) + ENDIF + #:endfor + #:endfor + ELSE + !... copy entire struct + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + CALL SELF%F_${var.upper()}$%SYNC_DEVICE_RDWR(QUEUE=QUEUE) + #:endfor + #:endfor + ENDIF + + END SUBROUTINE ${obj.upper()}$_SYNC_DEVICE_RDWR + + SUBROUTINE ${obj.upper()}$_SYNC_DEVICE_RDONLY(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$, QUEUE) + CLASS(${obj.upper()}$) :: SELF + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + #:set flag = ' LOGICAL, INTENT(IN), OPTIONAL :: ' + var.upper() + $:flag + #:set flag = ' LOGICAL :: ' + f'L_{var.upper()} = .FALSE.' + $:flag + #:endfor + #:endfor + INTEGER(KIND=JWIM), INTENT(IN), OPTIONAL :: QUEUE + + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + IF(PRESENT(${var.upper()}$)) L_${var.upper()}$ = ${var.upper()}$ + #:endfor + #:endfor + + + IF(${'.OR.'.join(f'L_{var.upper()}' for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$)THEN + !... copy only selected members + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + IF(${f'L_{var.upper()}'}$)THEN + CALL SELF%F_${var.upper()}$%SYNC_DEVICE_RDONLY(QUEUE=QUEUE) + ENDIF + #:endfor + #:endfor + ELSE + !... copy entire struct + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + CALL SELF%F_${var.upper()}$%SYNC_DEVICE_RDONLY(QUEUE=QUEUE) + #:endfor + #:endfor + ENDIF + END SUBROUTINE ${obj.upper()}$_SYNC_DEVICE_RDONLY - SUBROUTINE ${obj.upper()}$_SYNC_HOST_RDONLY(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$) + SUBROUTINE ${obj.upper()}$_GET_HOST_DATA_RDONLY(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$) CLASS(${obj.upper()}$) :: SELF #:for _, vars in zip(_def['types'], _def['vars']) #:for var in vars @@ -218,9 +300,9 @@ MODULE ${obj.upper()}$_TYPE_MOD #:endfor ENDIF - END SUBROUTINE ${obj.upper()}$_SYNC_HOST_RDONLY + END SUBROUTINE ${obj.upper()}$_GET_HOST_DATA_RDONLY - SUBROUTINE ${obj.upper()}$_SYNC_HOST_RDWR(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$) + SUBROUTINE ${obj.upper()}$_GET_HOST_DATA_RDWR(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$) CLASS(${obj.upper()}$) :: SELF #:for _, vars in zip(_def['types'], _def['vars']) #:for var in vars @@ -257,6 +339,84 @@ MODULE ${obj.upper()}$_TYPE_MOD #:endfor ENDIF + END SUBROUTINE ${obj.upper()}$_GET_HOST_DATA_RDWR + + SUBROUTINE ${obj.upper()}$_SYNC_HOST_RDONLY(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$, QUEUE) + CLASS(${obj.upper()}$) :: SELF + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + #:set flag = ' LOGICAL, INTENT(IN), OPTIONAL :: ' + var.upper() + $:flag + #:set flag = ' LOGICAL :: ' + f'L_{var.upper()} = .FALSE.' + $:flag + #:endfor + #:endfor + INTEGER(KIND=JWIM), INTENT(IN), OPTIONAL :: QUEUE + + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + IF(PRESENT(${var.upper()}$)) L_${var.upper()}$ = ${var.upper()}$ + #:endfor + #:endfor + + + IF(${'.OR.'.join(f'L_{var.upper()}' for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$)THEN + !... copy only selected members + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + IF(${f'L_{var.upper()}'}$)THEN + CALL SELF%F_${var.upper()}$%SYNC_HOST_RDONLY(QUEUE=QUEUE) + ENDIF + #:endfor + #:endfor + ELSE + !... copy entire struct + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + CALL SELF%F_${var.upper()}$%SYNC_HOST_RDONLY(QUEUE=QUEUE) + #:endfor + #:endfor + ENDIF + + END SUBROUTINE ${obj.upper()}$_SYNC_HOST_RDONLY + + SUBROUTINE ${obj.upper()}$_SYNC_HOST_RDWR(SELF, ${', '.join(var.upper() for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$, QUEUE) + CLASS(${obj.upper()}$) :: SELF + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + #:set flag = ' LOGICAL, INTENT(IN), OPTIONAL :: ' + var.upper() + $:flag + #:set flag = ' LOGICAL :: ' + f'L_{var.upper()} = .FALSE.' + $:flag + #:endfor + #:endfor + INTEGER(KIND=JWIM), INTENT(IN), OPTIONAL :: QUEUE + + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + IF(PRESENT(${var.upper()}$)) L_${var.upper()}$ = ${var.upper()}$ + #:endfor + #:endfor + + + IF(${'.OR.'.join(f'L_{var.upper()}' for _, vars in zip(_def['types'], _def['vars']) for var in vars)}$)THEN + !... copy only selected members + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + IF(${f'L_{var.upper()}'}$)THEN + CALL SELF%F_${var.upper()}$%SYNC_HOST_RDWR(QUEUE=QUEUE) + ENDIF + #:endfor + #:endfor + ELSE + !... copy entire struct + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + CALL SELF%F_${var.upper()}$%SYNC_HOST_RDWR(QUEUE=QUEUE) + #:endfor + #:endfor + ENDIF + END SUBROUTINE ${obj.upper()}$_SYNC_HOST_RDWR #:endif diff --git a/src/ecwam/ecwam_loki_gpu.config b/src/ecwam/ecwam_loki_gpu.config index 1666acc3..57468d5c 100644 --- a/src/ecwam/ecwam_loki_gpu.config +++ b/src/ecwam/ecwam_loki_gpu.config @@ -15,7 +15,7 @@ disable = [ 'ieee_set_halting_mode', 'ieee_get_halting_mode', # intrinsic subroutines 'mfeb_length', 'cdm', # internal functions 'outwspec_io_serv_handler', 'outint_io_serv_handler', 'ifstowam_handler', # procedure pointers - '*%sync_host*', '*%sync_device*', + '*%sync_host*', '*%sync_device*', 'wait_for_async_queue', 'field_async_module', '*%get_device_data*', '*%get_host_data*', 'df', 'f' # statement functions ] diff --git a/src/ecwam/initdpthflds.F90 b/src/ecwam/initdpthflds.F90 index a2fbe92a..b7dea528 100644 --- a/src/ecwam/initdpthflds.F90 +++ b/src/ecwam/initdpthflds.F90 @@ -88,7 +88,7 @@ SUBROUTINE INITDPTHFLDS(WVENVI, WVPRPT, WVPRPT_LAND) WVPRPT_LAND%CIWA(:) = 1.0_JWRB #ifdef WAM_GPU - CALL WVPRPT_LAND%SYNC_DEVICE_RDONLY() + CALL WVPRPT_LAND%GET_DEVICE_DATA_RDONLY() #endif IF (LHOOK) CALL DR_HOOK('INITDPTHFLDS',1,ZHOOK_HANDLE) diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index df094375..0dd5620c 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -42,6 +42,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & USE YOWSTAT , ONLY : CDTPRO, IDELPRO, IDELT, IDELWI, LLSOURCE, TIME_PROPAG, TIME_PHYS, & & LUPDATE_GPU_GLOBALS USE YOWWIND , ONLY : CDAWIFL, CDATEWO, CDATEFL +USE FIELD_ASYNC_MODULE, ONLY : WAIT_FOR_ASYNC_QUEUE USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK @@ -95,6 +96,14 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !$loki update_device ENDIF +CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=0) +CALL VARS_4D%F_FL1%GET_DEVICE_DATA_RDWR(VARS_4D%FL1) +!$acc enter data attach(VARS_4D%FL1) +CALL WVPRPT%GET_DEVICE_DATA_RDWR() +CALL WVENVI%GET_DEVICE_DATA_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & +& EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) +CALL BLK2GLO%GET_DEVICE_DATA_RDONLY() + !$acc data present(VARS_4D, WVPRPT, WVENVI, BLK2GLO) IF (CDATE == CDTPRA) THEN @@ -108,6 +117,13 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !* RETRIEVING NEW FORCING FIELDS IF NEEDED. ! ---------------------------------------- +CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=1) +CALL FF_NOW%GET_DEVICE_DATA_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & +& WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & +& CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) +CALL FF_NEXT%GET_DEVICE_DATA_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & +& WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & +& CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) CALL NEWWIND(CDTIMP, CDATEWH, LLNEWFILE, & & WVPRPT, FF_NOW, FF_NEXT) @@ -119,6 +135,17 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL GSTATS(1431,0) IF (LLSOURCE) THEN + CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=2) + CALL WAM2NEMO%GET_DEVICE_DATA_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & + & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & + & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) + CALL INTFLDS%GET_DEVICE_DATA_RDWR(WSEMEAN=.TRUE., WSFMEAN=.TRUE., USTOKES=.TRUE., & + & VSTOKES=.TRUE., STRNMS=.TRUE., TAUXD=.TRUE., TAUYD=.TRUE., TAUOCXD=.TRUE., & + & TAUOCYD=.TRUE., TAUOC=.TRUE., PHIOCD=.TRUE., PHIEPS=.TRUE., PHIAW=.TRUE.) + CALL VARS_4D%F_XLLWS%GET_DEVICE_DATA_RDWR(VARS_4D%XLLWS) + !$acc enter data attach(VARS_4D%XLLWS) + CALL MIJ%GET_DEVICE_DATA_RDWR() + TIME0=-WAM_USER_CLOCK() !$acc parallel loop gang vector_length(NPROMA_WAM) default(present) copyin(NPROMA_WAM) @@ -149,6 +176,11 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & TIME_PHYS = TIME_PHYS + (TIME0+WAM_USER_CLOCK())*1.E-06 + CALL VARS_4D%F_FL1%SYNC_HOST_RDONLY(QUEUE=3) + CALL FF_NOW%SYNC_HOST_RDONLY(QUEUE=3) + CALL WVENVI%SYNC_HOST_RDONLY(QUEUE=4) + CALL WAM2NEMO%SYNC_HOST_RDONLY(QUEUE=5) + IF (LWNEMOCOU) NEMONTAU = NEMONTAU + 1 ELSE diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index ed9e6cc5..97decb17 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -87,6 +87,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & USE WAM_MULTIO_MOD, ONLY : WAM_MULTIO_FLUSH USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK USE YOWABORT , ONLY : WAM_ABORT + USE FIELD_ASYNC_MODULE, ONLY : WAIT_FOR_ASYNC_QUEUE ! ---------------------------------------------------------------------- @@ -201,24 +202,27 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & !* 1. ADVECTION/PHYSICS TIME LOOP. ! ---------------------------- - CALL VARS_4D%SYNC_DEVICE_RDWR() - CALL BLK2GLO%SYNC_DEVICE_RDONLY() +#ifdef WAM_GPU + CALL VARS_4D%F_FL1%SYNC_DEVICE_RDWR(QUEUE=0) + CALL BLK2GLO%SYNC_DEVICE_RDONLY(QUEUE=0) + CALL WVPRPT%SYNC_DEVICE_RDWR(QUEUE=0) + CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & + & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE., QUEUE=0) CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & - & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE., QUEUE=1) CALL FF_NEXT%SYNC_DEVICE_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & - & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) - CALL WVPRPT%SYNC_DEVICE_RDWR() - CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & - & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE., QUEUE=1) CALL WAM2NEMO%SYNC_DEVICE_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & - & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) + & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE., QUEUE=2) CALL INTFLDS%SYNC_DEVICE_RDWR(WSEMEAN=.TRUE., WSFMEAN=.TRUE., USTOKES=.TRUE., & & VSTOKES=.TRUE., STRNMS=.TRUE., TAUXD=.TRUE., TAUYD=.TRUE., TAUOCXD=.TRUE., & - & TAUOCYD=.TRUE., TAUOC=.TRUE., PHIOCD=.TRUE., PHIEPS=.TRUE., PHIAW=.TRUE.) - CALL MIJ%SYNC_DEVICE_RDWR() + & TAUOCYD=.TRUE., TAUOC=.TRUE., PHIOCD=.TRUE., PHIEPS=.TRUE., PHIAW=.TRUE., QUEUE=2) + CALL VARS_4D%F_XLLWS%SYNC_DEVICE_RDWR(QUEUE=2) + CALL MIJ%SYNC_DEVICE_RDWR(QUEUE=2) +#endif ADVECTION : DO KADV = 1,NADV @@ -332,14 +336,21 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & WVPRPT, WVENVI, FF_NOW, INTFLDS, NEMO2WAM, & & BOUT) #endif + ENDIF !* 1.5 POINT OUTPUT (not usually used at ECMWF) ! ---------------------------------------- IF ( NGOUT > 0 .AND. (CDTINTT == CDTPRO .OR. LRST) ) THEN ! OUTPUT POINT SPECTRA (not usually used at ECMWF) - CALL VARS_4D%F_FL1%SYNC_HOST_RDONLY() - CALL FF_NOW%SYNC_HOST_RDONLY() +#ifdef WAM_GPU + CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=3) + CALL VARS_4D%F_FL1%GET_HOST_DATA_RDONLY(VARS_4D%FL1) + !$acc exit data detach(VARS_4D%FL1) + CALL FF_NOW%GET_HOST_DATA_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & + & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) +#endif CALL OUTWPSP (VARS_4D%FL1, FF_NOW) ENDIF @@ -392,8 +403,14 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & MARSTYPE='an' ENDIF - CALL VARS_4D%F_FL1%SYNC_HOST_RDONLY() - CALL FF_NOW%SYNC_HOST_RDONLY() +#ifdef WAM_GPU + CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=3) + CALL VARS_4D%F_FL1%GET_HOST_DATA_RDONLY(VARS_4D%FL1) + !$acc exit data detach(VARS_4D%FL1) + CALL FF_NOW%GET_HOST_DATA_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & + & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) +#endif CALL OUTSPEC(VARS_4D%FL1, FF_NOW) LLFLUSH = .TRUE. @@ -408,9 +425,18 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ! 1.8.2 SAVE RESTART FILES IN PURE BINARY FORM (in needed) ! -------------------------------------- IF ( .NOT.LGRIBOUT .OR. LDWRRE ) THEN - CALL FL1%SYNC_HOST_RDONLY() - CALL FF_NOW%SYNC_HOST_RDONLY() - CALL WVENVI%SYNC_HOST_RDONLY() +#ifdef WAM_GPU + CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=3) + CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=4) + + CALL VARS_4D%F_FL1%GET_HOST_DATA_RDONLY(VARS_4D%FL1) + !$acc exit data detach(VARS_4D%FL1) + CALL FF_NOW%GET_HOST_DATA_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & + & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) + CALL WVENVI%GET_HOST_DATA_RDONLY(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & + & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) +#endif CALL SAVSTRESS(WVENVI, FF_NOW, NBLKS, NBLKE, CDTPRO, CDATEF) WRITE(IU06,*) ' ' @@ -501,12 +527,14 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ENDIF - CALL FL1%SYNC_DEVICE_RDWR() +#ifdef WAM_GPU + CALL VARS_4D%F_FL1%SYNC_DEVICE_RDWR(QUEUE=0) + CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & + & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE., QUEUE=0) CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & - & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) - CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & - & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) + & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE., QUEUE=1) +#endif !* 1.10 FLUSH FDB IF IT HAS BEEN USED AND IT IS NOT AN ANALYSIS (it will be done in *wamassi*) ! ------------------------------------------------------- @@ -557,7 +585,12 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & IF (MOD(NEMOWSTEP,NEMOFRCO) == 0) THEN - CALL WAM2NEMO%SYNC_HOST_RDONLY() +#ifdef WAM_GPU + CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=5) + CALL WAM2NEMO%GET_HOST_DATA_RDONLY(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & + & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & + & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) +#endif CALL UPDNEMOFIELDS CALL UPDNEMOSTRESS @@ -571,9 +604,11 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & #endif NEMOCSTEP = NEMOCSTEP + NEMONSTEP +#ifdef WAM_GPU CALL WAM2NEMO%SYNC_DEVICE_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & - & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) + & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE., QUEUE=2) +#endif ENDIF ENDIF @@ -582,6 +617,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & !* BRANCHING BACK TO 1.0 FOR NEXT PROPAGATION STEP. ENDDO ADVECTION +#ifdef WAM_GPU CALL WVPRPT%SYNC_HOST_RDWR() CALL WVENVI%SYNC_HOST_RDWR() CALL FF_NOW%SYNC_HOST_RDWR() @@ -591,6 +627,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & CALL VARS_4D%SYNC_HOST_RDWR() CALL MIJ%SYNC_HOST_RDWR() CALL BLK2GLO%SYNC_HOST_RDWR() +#endif IF(MIJ%LALLOC) CALL MIJ%DEALLOC() From e5d589fc842107532c50918c8bd46e3f22448b25 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 23 May 2024 12:52:22 +0000 Subject: [PATCH 05/17] O320: add single precision validation hashes and lower nproma to 64 --- tests/etopo1_oper_an_fc_O320.yml | 35 +++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tests/etopo1_oper_an_fc_O320.yml b/tests/etopo1_oper_an_fc_O320.yml index 2ee3c09e..658aae02 100644 --- a/tests/etopo1_oper_an_fc_O320.yml +++ b/tests/etopo1_oper_an_fc_O320.yml @@ -16,7 +16,7 @@ forecast.end: 2023-01-01 06:00:00 begin: ${analysis.begin} end: ${forecast.end} -nproma: 128 +nproma: 64 forcings: file: data/forcings/oper_an_12h_fc_2023010100_36h_O320.grib @@ -83,3 +83,36 @@ validation: maximum: 0.7470743687402659E+01 relative_tolerance: 1.e-14 hashes: [0x401DE20AA218C57F] + + single_precision: + + # initial analysis time + - name: swh + time: 2022-12-31 12:00:00 + average: 0.1334374189376831E+01 + relative_tolerance: 1.e-6 + hashes: ['0x3FF55998C0000000'] + + # initial forecast time + - name: swh + time: 2023-01-01 00:00:00 + average: 0.1522672772407532E+01 + relative_tolerance: 1.e-6 + hashes: ['0x3FF85CDE20000000'] + + # 6h into forcast + - name: swh + time: 2023-01-01 06:00:00 + average: 0.1602276444435120E+01 + relative_tolerance: 1.e-6 + hashes: ['0x3FF9A2ECA0000000'] + - name: swh + time: 2023-01-01 06:00:00 + minimum: 0.1733699440956116E-01 + relative_tolerance: 1.e-6 + hashes: ['0x3F91C0CA00000000'] + - name: swh + time: 2023-01-01 06:00:00 + maximum: 0.7470741748809814E+01 + relative_tolerance: 1.e-6 + hashes: ['0x401DE20A20000000'] \ No newline at end of file From 05c5332e4ca86d83353fe97fb3d0ce7ef7b97833 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 23 May 2024 12:57:40 +0000 Subject: [PATCH 06/17] Move WVPRPT_LAND data movement to WAMODEL --- src/ecwam/initdpthflds.F90 | 4 ---- src/ecwam/proenvhalo.F90 | 3 +++ src/ecwam/wamodel.F90 | 4 +++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/ecwam/initdpthflds.F90 b/src/ecwam/initdpthflds.F90 index b7dea528..41eea9ab 100644 --- a/src/ecwam/initdpthflds.F90 +++ b/src/ecwam/initdpthflds.F90 @@ -87,10 +87,6 @@ SUBROUTINE INITDPTHFLDS(WVENVI, WVPRPT, WVPRPT_LAND) WVPRPT_LAND%CIWA(:) = 1.0_JWRB -#ifdef WAM_GPU - CALL WVPRPT_LAND%GET_DEVICE_DATA_RDONLY() -#endif - IF (LHOOK) CALL DR_HOOK('INITDPTHFLDS',1,ZHOOK_HANDLE) END SUBROUTINE INITDPTHFLDS diff --git a/src/ecwam/proenvhalo.F90 b/src/ecwam/proenvhalo.F90 index a5a6b070..939a1585 100644 --- a/src/ecwam/proenvhalo.F90 +++ b/src/ecwam/proenvhalo.F90 @@ -89,6 +89,9 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & !$OMP END PARALLEL DO #endif /*_OPENACC*/ +#ifdef WAM_GPU + CALL WVPRPT_LAND%GET_DEVICE_DATA_RDONLY() +#endif CALL MPEXCHNG(BUFFER_EXT, 3*NFRE_RED+5, 1, 1) !$acc kernels present(WVPRPT_LAND) BUFFER_EXT(NSUP+1,1:NFRE_RED) = WVPRPT_LAND%WAVNUM(1:NFRE_RED) diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index 97decb17..f1af4e21 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -63,7 +63,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & LRSTST0 ,LWAMANOUT USE YOWCURR , ONLY : CDTCUR USE YOWFPBO , ONLY : IBOUNF - USE YOWFRED , ONLY : FR ,TH + USE YOWFRED , ONLY : FR ,TH, WVPRPT_LAND USE YOWGRID , ONLY : NPROMA_WAM, NCHNK USE YOWICE , ONLY : LICERUN ,LMASKICE USE YOWMESPAS, ONLY : LFDBIOOUT,LGRIBOUT ,LNOCDIN ,LWAVEWIND @@ -203,6 +203,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ! ---------------------------- #ifdef WAM_GPU + CALL WVPRPT_LAND%SYNC_DEVICE_RDONLY(QUEUE=0) CALL VARS_4D%F_FL1%SYNC_DEVICE_RDWR(QUEUE=0) CALL BLK2GLO%SYNC_DEVICE_RDONLY(QUEUE=0) CALL WVPRPT%SYNC_DEVICE_RDWR(QUEUE=0) @@ -618,6 +619,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ENDDO ADVECTION #ifdef WAM_GPU + CALL WVPRPT_LAND%SYNC_HOST_RDWR() CALL WVPRPT%SYNC_HOST_RDWR() CALL WVENVI%SYNC_HOST_RDWR() CALL FF_NOW%SYNC_HOST_RDWR() From 1fd99ee9678e16a147bfd149fbfd24a8310c9c4e Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 23 May 2024 13:14:07 +0000 Subject: [PATCH 07/17] Time advection loop --- src/ecwam/runwam.F90 | 10 +++++----- src/ecwam/wamodel.F90 | 7 ++++++- src/ecwam/yowstat.F90 | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/ecwam/runwam.F90 b/src/ecwam/runwam.F90 index 1d6f0502..61eedd53 100644 --- a/src/ecwam/runwam.F90 +++ b/src/ecwam/runwam.F90 @@ -102,7 +102,7 @@ SUBROUTINE RUNWAM USE YOWMPP , ONLY : IRANK ,NPROC USE YOWSTAT , ONLY : CDATEE ,CDTPRO , & & IPROPAGS ,LSUBGRID ,IREFRA ,IDELPRO, TIME_PHYS, & - & TIME_PROPAG, TIME_OFFLOAD + & TIME_PROPAG, MODEL_TIME USE YOWWAMI , ONLY : CBPLTDT ,CEPLTDT USE YOWALTAS , ONLY : LODBRALT USE MPL_MODULE, ONLY : MPL_INIT, MPL_END, MPL_COMM @@ -390,13 +390,13 @@ SUBROUTINE RUNWAM WRITE (IU06,'(A)') ' ++++++++++++++++++++++++++++++' WRITE (IU06,'(A)') ' + TOTAL USER TIME IN SECONDS +' WRITE (IU06,'(A,F18.2,A)') ' + ', time, ' +' + WRITE (IU06,'(A)') ' + MODEL TIME +' + WRITE (IU06,'(A,F18.2,A)') ' + ', MODEL_TIME, ' +' WRITE (IU06,'(A)') ' + WAVE PROPAGATION TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PROPAG, ' +' #if defined(WAM_GPU) WRITE (IU06,'(A)') ' + SOURCE TERM TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' - WRITE (IU06,'(A)') ' + DATA OFFLOAD TIME +' - WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_OFFLOAD, ' +' #else WRITE (IU06,'(A)') ' + SOURCE TERM TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' @@ -408,13 +408,13 @@ SUBROUTINE RUNWAM WRITE (6,'(A)') ' ++++++++++++++++++++++++++++++' WRITE (6,'(A)') ' + TOTAL USER TIME IN SECONDS +' WRITE (6,'(A,F18.2,A)') ' + ', time, ' +' + WRITE (6,'(A)') ' + MODEL TIME +' + WRITE (6,'(A,F18.2,A)') ' + ', MODEL_TIME, ' +' WRITE (6,'(A)') ' + WAVE PROPAGATION TIME +' WRITE (6,'(A,F18.2,A)') ' + ', TIME_PROPAG, ' +' #if defined(WAM_GPU) WRITE (6,'(A)') ' + SOURCE TERM TIME +' WRITE (6,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' - WRITE (6,'(A)') ' + DATA OFFLOAD TIME +' - WRITE (6,'(A,F18.2,A)') ' + ', TIME_OFFLOAD, ' +' #else WRITE (6,'(A)') ' + SOURCE TERM TIME +' WRITE (6,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index f1af4e21..ecf70bd0 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -74,7 +74,8 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & IDELWI ,IREST ,IDELRES ,IDELINT , & & CDTBC ,IDELBC , & & IASSI ,MARSTYPE , & - & LLSOURCE ,LANAONLY ,LFRSTFLD ,IREFDATE, LUPDATE_GPU_GLOBALS + & LLSOURCE ,LANAONLY ,LFRSTFLD ,IREFDATE, LUPDATE_GPU_GLOBALS, & + & MODEL_TIME USE YOWSPEC, ONLY : NBLKS ,NBLKE USE YOWTEST , ONLY : IU06 USE YOWTEXT , ONLY : ICPLEN ,CPATH ,CWI ,LRESTARTED @@ -112,6 +113,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & #include "updnemofields.intfb.h" #include "updnemostress.intfb.h" #include "writsta.intfb.h" +#include "wam_user_clock.intfb.h" #ifdef WAM_GPU #include "outbs_loki_gpu.intfb.h" @@ -140,6 +142,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & INTEGER(KIND=JWIM) :: JSTPNEMO, IDATE, ITIME INTEGER(KIND=JWIM) :: IU04 TYPE(MIJ_TYPE) :: MIJ + REAL(KIND=JWRB) :: TIME0 REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NIPRMOUT, NCHNK) :: BOUT @@ -202,6 +205,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & !* 1. ADVECTION/PHYSICS TIME LOOP. ! ---------------------------- + TIME0=-WAM_USER_CLOCK() #ifdef WAM_GPU CALL WVPRPT_LAND%SYNC_DEVICE_RDONLY(QUEUE=0) CALL VARS_4D%F_FL1%SYNC_DEVICE_RDWR(QUEUE=0) @@ -632,6 +636,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & #endif IF(MIJ%LALLOC) CALL MIJ%DEALLOC() + MODEL_TIME = MODEL_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 IF (LHOOK) CALL DR_HOOK('WAMODEL',1,ZHOOK_HANDLE) diff --git a/src/ecwam/yowstat.F90 b/src/ecwam/yowstat.F90 index 48644b04..304e415f 100644 --- a/src/ecwam/yowstat.F90 +++ b/src/ecwam/yowstat.F90 @@ -93,7 +93,7 @@ MODULE YOWSTAT REAL(KIND=JWRB) :: TIME_PROPAG = 0._JWRB REAL(KIND=JWRB) :: TIME_PHYS = 0._JWRB - REAL(KIND=JWRB) :: TIME_OFFLOAD = 0._JWRB + REAL(KIND=JWRB) :: MODEL_TIME = 0._JWRB !* VARIABLE. TYPE. PURPOSE. ! --------- ------- -------- From 0ebda17bde559f724af9eaf16b95e6aaa1ec1bb8 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 23 May 2024 13:17:52 +0000 Subject: [PATCH 08/17] Allocate MIJ and XLLWS only once --- src/ecwam/wamodel.F90 | 5 +---- src/ecwam/wvalloc.F90 | 7 +++++-- src/ecwam/wvdealloc.F90 | 4 +++- src/ecwam/yowspec.F90 | 3 ++- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index ecf70bd0..46ebe286 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -76,7 +76,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & IASSI ,MARSTYPE , & & LLSOURCE ,LANAONLY ,LFRSTFLD ,IREFDATE, LUPDATE_GPU_GLOBALS, & & MODEL_TIME - USE YOWSPEC, ONLY : NBLKS ,NBLKE + USE YOWSPEC, ONLY : NBLKS ,NBLKE, MIJ USE YOWTEST , ONLY : IU06 USE YOWTEXT , ONLY : ICPLEN ,CPATH ,CWI ,LRESTARTED USE YOWUNIT , ONLY : IU02 ,IU19 ,IU20 @@ -141,7 +141,6 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & INTEGER(KIND=JWIM) :: ICHNK INTEGER(KIND=JWIM) :: JSTPNEMO, IDATE, ITIME INTEGER(KIND=JWIM) :: IU04 - TYPE(MIJ_TYPE) :: MIJ REAL(KIND=JWRB) :: TIME0 REAL(KIND=JPHOOK) :: ZHOOK_HANDLE @@ -200,7 +199,6 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ENDIF - IF(.NOT. MIJ%LALLOC) CALL MIJ%ALLOC(UBOUNDS=[NPROMA_WAM, NCHNK]) !* 1. ADVECTION/PHYSICS TIME LOOP. ! ---------------------------- @@ -635,7 +633,6 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & CALL BLK2GLO%SYNC_HOST_RDWR() #endif - IF(MIJ%LALLOC) CALL MIJ%DEALLOC() MODEL_TIME = MODEL_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 IF (LHOOK) CALL DR_HOOK('WAMODEL',1,ZHOOK_HANDLE) diff --git a/src/ecwam/wvalloc.F90 b/src/ecwam/wvalloc.F90 index f48023d2..53a9693e 100644 --- a/src/ecwam/wvalloc.F90 +++ b/src/ecwam/wvalloc.F90 @@ -22,7 +22,7 @@ SUBROUTINE WVALLOC USE YOWPARAM , ONLY : NANG ,NFRE USE YOWPCONS , ONLY : ZMISS USE YOWSHAL , ONLY : WVPRPT - USE YOWSPEC , ONLY : FF_NOW ,VARS_4D + USE YOWSPEC , ONLY : FF_NOW ,VARS_4D, MIJ USE YOWWIND , ONLY : FF_NEXT USE YOWNEMOFLDS , ONLY : WAM2NEMO, NEMO2WAM @@ -57,7 +57,10 @@ SUBROUTINE WVALLOC VARS_4D%FL1(:,:,:,:) = 0.0_JWRB ENDIF - + IF(.NOT. MIJ%LALLOC)THEN + CALL MIJ%ALLOC(UBOUNDS=[NPROMA_WAM, NCHNK]) + MIJ%PTR(:,:) = 0.0_JWRB + ENDIF IF (.NOT. INTFLDS%LALLOC) THEN CALL INTFLDS%ALLOC(UBOUNDS=[NPROMA_WAM, NCHNK]) diff --git a/src/ecwam/wvdealloc.F90 b/src/ecwam/wvdealloc.F90 index 64dbb1a0..8c3c3a1c 100644 --- a/src/ecwam/wvdealloc.F90 +++ b/src/ecwam/wvdealloc.F90 @@ -28,7 +28,7 @@ SUBROUTINE WVDEALLOC USE YOWMEAN , ONLY : INTFLDS USE YOWWIND , ONLY : FF_NEXT USE YOWGRID , ONLY : NCHNK - USE YOWSPEC , ONLY : FF_NOW ,VARS_4D + USE YOWSPEC , ONLY : FF_NOW ,VARS_4D, MIJ USE YOWSHAL , ONLY : WVPRPT USE YOWFRED , ONLY : WVPRPT_LAND @@ -82,6 +82,8 @@ SUBROUTINE WVDEALLOC ENDIF ENDIF + IF(MIJ%LALLOC) CALL MIJ%DEALLOC() + IF (LHOOK) CALL DR_HOOK('WVDEALLOC',1,ZHOOK_HANDLE) END SUBROUTINE WVDEALLOC diff --git a/src/ecwam/yowspec.F90 b/src/ecwam/yowspec.F90 index e2d705bd..718211f6 100644 --- a/src/ecwam/yowspec.F90 +++ b/src/ecwam/yowspec.F90 @@ -10,7 +10,7 @@ MODULE YOWSPEC USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU - USE YOWDRVTYPE, ONLY : FORCING_FIELDS, TYPE_4D + USE YOWDRVTYPE, ONLY : FORCING_FIELDS, TYPE_4D, MIJ_TYPE IMPLICIT NONE @@ -34,6 +34,7 @@ MODULE YOWSPEC TYPE(FORCING_FIELDS) :: FF_NOW TYPE(TYPE_4D) :: VARS_4D + TYPE(MIJ_TYPE) :: MIJ ! *NSTART* INDEX OF THE FIRST POINT OF THE COMPUTATION SUB GRID DOMAIN ! *NEND* INDEX OF THE LAST POINT OF THE COMPUTATION SUB GRID DOMAIN From 11a679c2dccdeaf7fa25d51ec06dfd7177c122b5 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 23 May 2024 14:22:48 +0000 Subject: [PATCH 09/17] Delete GPU allocations at the end of WAMODEL --- src/ecwam/drvtype_mod.fypp | 12 ++++++++++++ src/ecwam/wamodel.F90 | 31 +++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/ecwam/drvtype_mod.fypp b/src/ecwam/drvtype_mod.fypp index 9928fd45..b46b5787 100644 --- a/src/ecwam/drvtype_mod.fypp +++ b/src/ecwam/drvtype_mod.fypp @@ -57,6 +57,7 @@ MODULE ${obj.upper()}$_TYPE_MOD PROCEDURE :: GET_DEVICE_DATA_RDONLY => ${obj.upper()}$_GET_DEVICE_DATA_RDONLY PROCEDURE :: GET_HOST_DATA_RDWR => ${obj.upper()}$_GET_HOST_DATA_RDWR PROCEDURE :: GET_HOST_DATA_RDONLY => ${obj.upper()}$_GET_HOST_DATA_RDONLY + PROCEDURE :: DELETE_DEVICE_DATA => ${obj.upper()}$_DELETE_DEVICE_DATA #:endif END TYPE ${obj.upper()}$ @@ -418,6 +419,17 @@ MODULE ${obj.upper()}$_TYPE_MOD ENDIF END SUBROUTINE ${obj.upper()}$_SYNC_HOST_RDWR + + SUBROUTINE ${obj.upper()}$_DELETE_DEVICE_DATA(SELF) + CLASS(${obj.upper()}$) :: SELF + + #:for _, vars in zip(_def['types'], _def['vars']) + #:for var in vars + CALL SELF%F_${var.upper()}$%DELETE_DEVICE_DATA() + #:endfor + #:endfor + + END SUBROUTINE ${obj.upper()}$_DELETE_DEVICE_DATA #:endif END MODULE ${obj.upper()}$_TYPE_MOD diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index 46ebe286..e2f71b7c 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -621,16 +621,27 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ENDDO ADVECTION #ifdef WAM_GPU - CALL WVPRPT_LAND%SYNC_HOST_RDWR() - CALL WVPRPT%SYNC_HOST_RDWR() - CALL WVENVI%SYNC_HOST_RDWR() - CALL FF_NOW%SYNC_HOST_RDWR() - CALL FF_NEXT%SYNC_HOST_RDWR() - CALL WAM2NEMO%SYNC_HOST_RDWR() - CALL INTFLDS%SYNC_HOST_RDWR() - CALL VARS_4D%SYNC_HOST_RDWR() - CALL MIJ%SYNC_HOST_RDWR() - CALL BLK2GLO%SYNC_HOST_RDWR() + CALL WVPRPT_LAND%GET_HOST_DATA_RDWR() + CALL WVPRPT%GET_HOST_DATA_RDWR() + CALL WVENVI%GET_HOST_DATA_RDWR() + CALL FF_NOW%GET_HOST_DATA_RDWR() + CALL FF_NEXT%GET_HOST_DATA_RDWR() + CALL WAM2NEMO%GET_HOST_DATA_RDWR() + CALL INTFLDS%GET_HOST_DATA_RDWR() + CALL VARS_4D%GET_HOST_DATA_RDWR() + CALL MIJ%GET_HOST_DATA_RDWR() + CALL BLK2GLO%GET_HOST_DATA_RDWR() + + CALL WVPRPT_LAND%DELETE_DEVICE_DATA() + CALL WVPRPT%DELETE_DEVICE_DATA() + CALL WVENVI%DELETE_DEVICE_DATA() + CALL FF_NOW%DELETE_DEVICE_DATA() + CALL FF_NEXT%DELETE_DEVICE_DATA() + CALL WAM2NEMO%DELETE_DEVICE_DATA() + CALL INTFLDS%DELETE_DEVICE_DATA() + CALL VARS_4D%DELETE_DEVICE_DATA() + CALL MIJ%DELETE_DEVICE_DATA() + CALL BLK2GLO%DELETE_DEVICE_DATA() #endif MODEL_TIME = MODEL_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 From ebb01164bba0bb2f1dbe1a9b011e5514df476c61 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 24 May 2024 11:39:03 +0000 Subject: [PATCH 10/17] CUDA: enable pinning of fields --- CMakeLists.txt | 12 +++++++++++- README.md | 3 ++- cmake/ecwam_expand_drv_types.cmake | 2 +- package/bundle/bundle.yml | 5 +++++ src/ecwam/CMakeLists.txt | 6 +++++- src/ecwam/drvtype_mod.fypp | 2 +- src/ecwam/wvalloc.F90 | 6 ++++++ 7 files changed, 31 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7d1dbc7..f38232bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,7 +135,17 @@ ecbuild_add_option( FEATURE ACC REQUIRED_PACKAGES "OpenACC COMPONENTS Fortran" CONDITION HAVE_LOKI AND NOT LOKI_MODE MATCHES "idem|idem-stack" ) -### CUDA-aware MPI +### CUDA +include(CheckLanguage) +check_language(CUDA) +ecbuild_add_option( FEATURE CUDA + DESCRIPTION "CUDA" DEFAULT OFF + CONDITION CMAKE_CUDA_COMPILER AND HAVE_ACC ) +if( HAVE_CUDA ) + enable_language( CUDA ) +endif() + +### GPU-aware MPI ecbuild_add_option( FEATURE GPU_AWARE_MPI DEFAULT OFF DESCRIPTION "Enable GPU-aware MPI" diff --git a/README.md b/README.md index 83a2ab0d..6c56b0fd 100644 --- a/README.md +++ b/README.md @@ -232,7 +232,8 @@ Building The recommended option for building the GPU enabled ecWAM is to use the provided bundle, and pass the `--with-loki --with-acc` options. Different Loki transformations can also be chosen at build-time via the following bundle option: `--loki-mode=`. Direct GPU-to-GPU MPI communications can be enabled by passing the -`--with-gpu-aware-mpi` option. +`--with-gpu-aware-mpi` option. CPU to GPU data transfers can be accelerated (via pinning of host-side allocations) +by building with the `--with-cuda` option. The ecwam-bundle also provides appropriate arch files for the nvhpc suite on the ECMWF ATOS system. diff --git a/cmake/ecwam_expand_drv_types.cmake b/cmake/ecwam_expand_drv_types.cmake index 89e29561..6006c4d6 100644 --- a/cmake/ecwam_expand_drv_types.cmake +++ b/cmake/ecwam_expand_drv_types.cmake @@ -18,7 +18,7 @@ macro( ecwam_expand_drv_types ) list(APPEND FYPP_ARGS -DPARKIND1_SINGLE_NEMO) endif() - if( HAVE_ACC ) + if( HAVE_LOKI AND NOT LOKI_MODE MATCHES "idem|idem-stack" ) list(APPEND FYPP_ARGS -DWAM_GPU) endif() diff --git a/package/bundle/bundle.yml b/package/bundle/bundle.yml index a882fc08..e240bfab 100644 --- a/package/bundle/bundle.yml +++ b/package/bundle/bundle.yml @@ -75,6 +75,11 @@ options : cmake : > ENABLE_ACC=ON + - with-cuda : + help : Enable FIELD_API CUDA backend + cmake : > + ENABLE_CUDA=ON + - without-loki-install : help : Skip installation of Loki (Requires Loki to be on the PATH) cmake : > diff --git a/src/ecwam/CMakeLists.txt b/src/ecwam/CMakeLists.txt index 7a62ef68..0df935b9 100644 --- a/src/ecwam/CMakeLists.txt +++ b/src/ecwam/CMakeLists.txt @@ -446,7 +446,7 @@ ecbuild_add_library( $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> PUBLIC_INCLUDES $ PRIVATE_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE_DEFINITIONS ${ECWAM_PRIVATE_DEFINITIONS} + PRIVATE_DEFINITIONS ${ECWAM_PRIVATE_DEFINITIONS} $<${HAVE_CUDA}:_CUDA> PUBLIC_DEFINITIONS ${ECWAM_DEFINITIONS} ) @@ -461,6 +461,10 @@ if( HAVE_ACC AND CMAKE_Fortran_COMPILER_ID MATCHES "PGI|NVHPC") target_compile_options( ${ecwam} PRIVATE "-gpu=maxregcount:128" ) endif() +if( HAVE_CUDA ) + target_link_options( ${ecwam} PUBLIC "-cuda;-gpu=pinned" ) +endif() + ecwam_target_compile_definitions_FILENAME( ${ecwam} ) ### The file mubuf.F90, which is only used for "preproc" is sensitive to optimisations diff --git a/src/ecwam/drvtype_mod.fypp b/src/ecwam/drvtype_mod.fypp index b46b5787..2b25b4e4 100644 --- a/src/ecwam/drvtype_mod.fypp +++ b/src/ecwam/drvtype_mod.fypp @@ -419,7 +419,7 @@ MODULE ${obj.upper()}$_TYPE_MOD ENDIF END SUBROUTINE ${obj.upper()}$_SYNC_HOST_RDWR - + SUBROUTINE ${obj.upper()}$_DELETE_DEVICE_DATA(SELF) CLASS(${obj.upper()}$) :: SELF diff --git a/src/ecwam/wvalloc.F90 b/src/ecwam/wvalloc.F90 index 53a9693e..96b8ba20 100644 --- a/src/ecwam/wvalloc.F90 +++ b/src/ecwam/wvalloc.F90 @@ -28,6 +28,7 @@ SUBROUTINE WVALLOC USE YOWNEMOFLDS , ONLY : WAM2NEMO, NEMO2WAM USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK + USE FIELD_DEFAULTS_MODULE, ONLY : INIT_PINNED_VALUE ! ---------------------------------------------------------------------- @@ -44,6 +45,11 @@ SUBROUTINE WVALLOC ! 1. ALLOCATE NECESSARY ARRAYS ! ------------------------- +#ifdef _CUDA +!.... Enable pinning of fields in page-locked memory + INIT_PINNED_VALUE=.TRUE. +#endif + IF (.NOT. WVPRPT%LALLOC)THEN CALL WVPRPT%ALLOC(UBOUNDS=[NPROMA_WAM, NFRE, NCHNK]) ENDIF From 15d8296e34b8a0cf7f602ea40d4261a138c88762 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Tue, 28 May 2024 14:30:42 +0000 Subject: [PATCH 11/17] TIMINGS: add timers for MPI,I/O --- src/ecwam/proenvhalo.F90 | 5 +++++ src/ecwam/propag_wam.F90 | 8 +++++++- src/ecwam/runwam.F90 | 10 +++++++++- src/ecwam/wamodel.F90 | 8 ++++++-- src/ecwam/yowstat.F90 | 2 ++ 5 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/ecwam/proenvhalo.F90 b/src/ecwam/proenvhalo.F90 index 939a1585..9ecc6ca8 100644 --- a/src/ecwam/proenvhalo.F90 +++ b/src/ecwam/proenvhalo.F90 @@ -30,6 +30,7 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & USE YOWGRID , ONLY : NPROMA_WAM, NCHNK, KIJL4CHNK, IJFROMCHNK USE YOWPARAM , ONLY : NFRE , NFRE_RED USE YOWSHAL , ONLY : BATHYMAX + USE YOWSTAT , ONLY : MPI_TIME USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK USE YOWDRVTYPE, ONLY: ENVIRONMENT, FREQUENCY @@ -39,6 +40,7 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & IMPLICIT NONE #include "mpexchng.intfb.h" +#include "wam_user_clock.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: NINF, NSUP ! HALO EXTEND NINF to NSUP+1 @@ -51,6 +53,7 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & INTEGER(KIND=JWIM) :: ICHNK, KIJS, KIJL, IJSB, IJLB REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JWRB) :: TIME0 ! ---------------------------------------------------------------------- @@ -92,7 +95,9 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & #ifdef WAM_GPU CALL WVPRPT_LAND%GET_DEVICE_DATA_RDONLY() #endif + TIME0=-WAM_USER_CLOCK() CALL MPEXCHNG(BUFFER_EXT, 3*NFRE_RED+5, 1, 1) + MPI_TIME = MPI_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 !$acc kernels present(WVPRPT_LAND) BUFFER_EXT(NSUP+1,1:NFRE_RED) = WVPRPT_LAND%WAVNUM(1:NFRE_RED) BUFFER_EXT(NSUP+1,NFRE_RED+1:2*NFRE_RED) = WVPRPT_LAND%CGROUP(1:NFRE_RED) diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index 3a0dd910..ee5d6e73 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -47,7 +47,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & USE YOWMPP , ONLY : NINF ,NSUP USE YOWPARAM , ONLY : NANG ,NFRE ,NFRE_RED ,LLUNSTR USE YOWREFD , ONLY : LLUPDTTD ,THDD ,THDC ,SDOT - USE YOWSTAT , ONLY : IPROPAGS ,IFRELFMAX, DELPRO_LF, IDELPRO + USE YOWSTAT , ONLY : IPROPAGS ,IFRELFMAX, DELPRO_LF, IDELPRO, MPI_TIME USE YOWUBUF , ONLY : LUPDTWGHT #ifdef WAM_HAVE_UNWAM USE UNWAM , ONLY : PROPAG_UNWAM @@ -70,6 +70,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & #include "propags1.intfb.h" #include "propags2.intfb.h" #include "propdot.intfb.h" +#include "wam_user_clock.intfb.h" TYPE(WVGRIDGLO), INTENT(IN) :: BLK2GLO REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(INOUT) :: FL1 @@ -83,6 +84,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & INTEGER(KIND=JWIM) :: IJSG, IJLG, ICHNK, KIJS, KIJL, IJSB, IJLB REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JWRB) :: TIME0 ! Spectra extended with the halo exchange for the propagation ! But limited to NFRE_RED frequencies @@ -159,7 +161,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & ! OBTAIN INFORMATION AT NEIGHBORING GRID POINTS (HALO) ! ---------------------------------------------------- + TIME0=-WAM_USER_CLOCK() CALL MPEXCHNG(FL1_EXT, NANG, 1, NFRE_RED) + MPI_TIME = MPI_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 CALL GSTATS(1430,0) @@ -267,7 +271,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & !$OMP END PARALLEL DO #endif /*_OPENACC*/ + TIME0=-WAM_USER_CLOCK() CALL MPEXCHNG(FL1_EXT(:,:,1:IFRELFMAX), NANG, 1, IFRELFMAX) + MPI_TIME = MPI_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL) diff --git a/src/ecwam/runwam.F90 b/src/ecwam/runwam.F90 index 61eedd53..ae95a79f 100644 --- a/src/ecwam/runwam.F90 +++ b/src/ecwam/runwam.F90 @@ -102,7 +102,7 @@ SUBROUTINE RUNWAM USE YOWMPP , ONLY : IRANK ,NPROC USE YOWSTAT , ONLY : CDATEE ,CDTPRO , & & IPROPAGS ,LSUBGRID ,IREFRA ,IDELPRO, TIME_PHYS, & - & TIME_PROPAG, MODEL_TIME + & TIME_PROPAG, MODEL_TIME, IO_TIME, MPI_TIME USE YOWWAMI , ONLY : CBPLTDT ,CEPLTDT USE YOWALTAS , ONLY : LODBRALT USE MPL_MODULE, ONLY : MPL_INIT, MPL_END, MPL_COMM @@ -392,6 +392,10 @@ SUBROUTINE RUNWAM WRITE (IU06,'(A,F18.2,A)') ' + ', time, ' +' WRITE (IU06,'(A)') ' + MODEL TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', MODEL_TIME, ' +' + WRITE (IU06,'(A)') ' + IO TIME +' + WRITE (IU06,'(A,F18.2,A)') ' + ', IO_TIME, ' +' + WRITE (IU06,'(A)') ' + MPI TIME +' + WRITE (IU06,'(A,F18.2,A)') ' + ', MPI_TIME, ' +' WRITE (IU06,'(A)') ' + WAVE PROPAGATION TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PROPAG, ' +' #if defined(WAM_GPU) @@ -410,6 +414,10 @@ SUBROUTINE RUNWAM WRITE (6,'(A,F18.2,A)') ' + ', time, ' +' WRITE (6,'(A)') ' + MODEL TIME +' WRITE (6,'(A,F18.2,A)') ' + ', MODEL_TIME, ' +' + WRITE (6,'(A)') ' + IO TIME +' + WRITE (6,'(A,F18.2,A)') ' + ', IO_TIME, ' +' + WRITE (6,'(A)') ' + MPI TIME +' + WRITE (6,'(A,F18.2,A)') ' + ', MPI_TIME, ' +' WRITE (6,'(A)') ' + WAVE PROPAGATION TIME +' WRITE (6,'(A,F18.2,A)') ' + ', TIME_PROPAG, ' +' #if defined(WAM_GPU) diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index e2f71b7c..4b6cf93f 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -75,7 +75,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & CDTBC ,IDELBC , & & IASSI ,MARSTYPE , & & LLSOURCE ,LANAONLY ,LFRSTFLD ,IREFDATE, LUPDATE_GPU_GLOBALS, & - & MODEL_TIME + & MODEL_TIME, IO_TIME USE YOWSPEC, ONLY : NBLKS ,NBLKE, MIJ USE YOWTEST , ONLY : IU06 USE YOWTEXT , ONLY : ICPLEN ,CPATH ,CWI ,LRESTARTED @@ -141,7 +141,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & INTEGER(KIND=JWIM) :: ICHNK INTEGER(KIND=JWIM) :: JSTPNEMO, IDATE, ITIME INTEGER(KIND=JWIM) :: IU04 - REAL(KIND=JWRB) :: TIME0 + REAL(KIND=JWRB) :: TIME0, TIME1 REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NIPRMOUT, NCHNK) :: BOUT @@ -441,6 +441,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) #endif + TIME1=-WAM_USER_CLOCK() CALL SAVSTRESS(WVENVI, FF_NOW, NBLKS, NBLKE, CDTPRO, CDATEF) WRITE(IU06,*) ' ' WRITE(IU06,*) ' BINARY STRESS FILE DISPOSED AT........ CDTPRO = ', CDTPRO @@ -450,6 +451,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & WRITE(IU06,*) ' BINARY WAVE SPECTRA DISPOSED AT........ CDTPRO = ', CDTPRO WRITE(IU06,*) ' ' CALL FLUSH(IU06) + IO_TIME = IO_TIME + (TIME1+WAM_USER_CLOCK())*1.E-06 ENDIF @@ -519,7 +521,9 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & MARSTYPE='an' ENDIF + TIME1=-WAM_USER_CLOCK() CALL OUTWINT(BOUT) + IO_TIME = IO_TIME + (TIME1+WAM_USER_CLOCK())*1.E-06 LLFLUSH = .TRUE. MARSTYPE=MARSTYPEBAK diff --git a/src/ecwam/yowstat.F90 b/src/ecwam/yowstat.F90 index 304e415f..4ddfe308 100644 --- a/src/ecwam/yowstat.F90 +++ b/src/ecwam/yowstat.F90 @@ -94,6 +94,8 @@ MODULE YOWSTAT REAL(KIND=JWRB) :: TIME_PROPAG = 0._JWRB REAL(KIND=JWRB) :: TIME_PHYS = 0._JWRB REAL(KIND=JWRB) :: MODEL_TIME = 0._JWRB + REAL(KIND=JWRB) :: IO_TIME = 0._JWRB + REAL(KIND=JWRB) :: MPI_TIME = 0._JWRB !* VARIABLE. TYPE. PURPOSE. ! --------- ------- -------- From eda0ccf4a829022b26f1c6b3a9c70f9955646081 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 29 Aug 2024 08:35:23 +0000 Subject: [PATCH 12/17] LOKI-SCC-STACK: normalise LBOUNDS in FNDPRT and inline PEAKFRI --- src/ecwam/ecwam_loki_gpu.config | 4 ++-- src/ecwam/fndprt.F90 | 10 +++++----- src/ecwam/peakfri.F90 | 8 ++++---- src/ecwam/wdirspread.F90 | 1 + 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/ecwam/ecwam_loki_gpu.config b/src/ecwam/ecwam_loki_gpu.config index 57468d5c..e6b6e1f9 100644 --- a/src/ecwam/ecwam_loki_gpu.config +++ b/src/ecwam/ecwam_loki_gpu.config @@ -67,10 +67,10 @@ block = ['ec_parkind', 'parkind_wave', 'yowdrvtype'] [routines.transf_r] [routines.transf_bfi] -# we add loki inlined routines here rather than the ignore list because we want them to be sanitised -# before inlining +# we add loki inlined routines here to force them to be created [routines.sebtmean] [routines.scosfl] +[routines.peakfri] # Disable replication for modules containing global variables [routines.yowaltas] diff --git a/src/ecwam/fndprt.F90 b/src/ecwam/fndprt.F90 index 83fe0f5c..e9fb6d5a 100644 --- a/src/ecwam/fndprt.F90 +++ b/src/ecwam/fndprt.F90 @@ -100,7 +100,7 @@ SUBROUTINE FNDPRT (KIJS, KIJL, NPMAX, & INTEGER(KIND=JWIM) :: NANGH, KK, KKMIN, KKMAX INTEGER(KIND=JWIM) :: IFRL, ITHL, ITHR INTEGER(KIND=JWIM), DIMENSION(KIJL) :: MMIN, MMAX - INTEGER(KIND=JWIM), DIMENSION(1-NANG:2*NANG) :: KLOC + INTEGER(KIND=JWIM), DIMENSION(3*NANG) :: KLOC REAL(KIND=JWRB) :: HALF_SECTOR REAL(KIND=JPHOOK) :: ZHOOK_HANDLE @@ -155,7 +155,7 @@ SUBROUTINE FNDPRT (KIJS, KIJL, NPMAX, & KKMIN=ITHC-NANGH KKMAX=ITHC+NANGH DO KK=KKMIN,KKMAX - KLOC(KK)=1+MOD(NANG+KK-1,NANG) + KLOC(KK+NANG)=1+MOD(NANG+KK-1,NANG) ENDDO !* 1. SET UP THE W2 MAP @@ -180,7 +180,7 @@ SUBROUTINE FNDPRT (KIJS, KIJL, NPMAX, & ! FIND IF MORE HIGH FREQUENCY BINS HAVE BECOME EXCLUDED OUT0: DO M=MMAX(IJ),MMIN(IJ),-1 DO KK=KKMIN,KKMAX - K=KLOC(KK) + K=KLOC(KK+NANG) IF (W1(IJ,K,M) < 1.0_JWRB) THEN MMAX(IJ)=M EXIT OUT0 @@ -205,7 +205,7 @@ SUBROUTINE FNDPRT (KIJS, KIJL, NPMAX, & ! by definition bins beyond M=MIJ are never extremas ! and bins above MMAX are excluded. DO KK=KKMIN,KKMAX - K = KLOC(KK) + K = KLOC(KK+NANG) IF (LLW3(IJ,K,M)) THEN IF (W2(K,M) == 0.5_JWRB .AND. & @@ -235,7 +235,7 @@ SUBROUTINE FNDPRT (KIJS, KIJL, NPMAX, & DO M=MMIN(IJ),MMAX(IJ) DO KK=KKMIN,KKMAX - K=KLOC(KK) + K=KLOC(KK+NANG) IF (LLW3(IJ,K,M) .AND. W1(IJ,K,M) < 1.0_JWRB) THEN IF (W2(K,M) == 0.0_JWRB) THEN diff --git a/src/ecwam/peakfri.F90 b/src/ecwam/peakfri.F90 index 3b7b0241..c6e3bec3 100644 --- a/src/ecwam/peakfri.F90 +++ b/src/ecwam/peakfri.F90 @@ -64,10 +64,10 @@ SUBROUTINE PEAKFRI (KIJS, KIJL, F, IPEAKF, EPEAKF, F1D) IMPLICIT NONE INTEGER(KIND=JWIM), INTENT(IN) :: KIJS, KIJL - REAL(KIND=JWRB), INTENT(IN) :: F(KIJS:KIJL,NANG,NFRE) - INTEGER(KIND=JWIM), INTENT(OUT) :: IPEAKF(KIJS:KIJL) - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL), INTENT(OUT) :: EPEAKF - REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,NFRE), INTENT(OUT) :: F1D + REAL(KIND=JWRB), INTENT(IN) :: F(KIJL,NANG,NFRE) + INTEGER(KIND=JWIM), INTENT(OUT) :: IPEAKF(KIJL) + REAL(KIND=JWRB), DIMENSION(KIJL), INTENT(OUT) :: EPEAKF + REAL(KIND=JWRB), DIMENSION(KIJL,NFRE), INTENT(OUT) :: F1D INTEGER(KIND=JWIM) :: IJ, K, M REAL(KIND=JPHOOK) :: ZHOOK_HANDLE diff --git a/src/ecwam/wdirspread.F90 b/src/ecwam/wdirspread.F90 index 8bd8568b..4e5529dd 100644 --- a/src/ecwam/wdirspread.F90 +++ b/src/ecwam/wdirspread.F90 @@ -107,6 +107,7 @@ SUBROUTINE WDIRSPREAD (KIJS, KIJL, F, EMEAN, LLPEAKF, WDIRSPRD) IF(LLPEAKF) THEN ! COMPUTATION IS BASED ON THE PEAK FREQUENCY + !$loki inline CALL PEAKFRI (KIJS, KIJL, F, IFRINDEX, TEMP, F1D) !$loki inline CALL SCOSFL (KIJS, KIJL, F, IFRINDEX, WDIRSPRD) From d4c9b34f9f473e2201dc8abd1333de6765440289 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 16 Sep 2024 12:49:18 +0000 Subject: [PATCH 13/17] Switch to DR_HOOK timers --- src/ecwam/outbs_loki_gpu.F90 | 6 +++--- src/ecwam/proenvhalo.F90 | 9 +++------ src/ecwam/propag_wam.F90 | 14 ++++++------- src/ecwam/runwam.F90 | 24 +--------------------- src/ecwam/wamintgr_loki_gpu.F90 | 12 ++++++++++- src/ecwam/wamodel.F90 | 36 +++++++++++++++++++++++---------- src/ecwam/yowstat.F90 | 3 --- 7 files changed, 49 insertions(+), 55 deletions(-) diff --git a/src/ecwam/outbs_loki_gpu.F90 b/src/ecwam/outbs_loki_gpu.F90 index 697cd7ed..f1135e59 100644 --- a/src/ecwam/outbs_loki_gpu.F90 +++ b/src/ecwam/outbs_loki_gpu.F90 @@ -78,7 +78,7 @@ SUBROUTINE OUTBS_LOKI_GPU (MIJ, FL1, XLLWS, & REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NIPRMOUT, NCHNK), INTENT(OUT) :: BOUT - INTEGER(KIND=JWIM) :: M, IJ, ICHNK, KIJS, KIJL + INTEGER(KIND=JWIM) :: ICHNK REAL(KIND=JPHOOK) :: ZHOOK_HANDLE @@ -86,7 +86,7 @@ SUBROUTINE OUTBS_LOKI_GPU (MIJ, FL1, XLLWS, & ! ---------------------------------------------------------------------- -IF (LHOOK) CALL DR_HOOK('OUTBS_LOKI_GPU',0,ZHOOK_HANDLE) +IF (LHOOK) CALL DR_HOOK('OUTBS',0,ZHOOK_HANDLE) !* 1. COMPUTE MEAN PARAMETERS. ! ------------------------ @@ -126,6 +126,6 @@ SUBROUTINE OUTBS_LOKI_GPU (MIJ, FL1, XLLWS, & IF (LLNORMWAMOUT) CALL OUTWNORM(LDREPROD, BOUT) -IF (LHOOK) CALL DR_HOOK('OUTBS_LOKI_GPU',1,ZHOOK_HANDLE) +IF (LHOOK) CALL DR_HOOK('OUTBS',1,ZHOOK_HANDLE) END SUBROUTINE OUTBS_LOKI_GPU diff --git a/src/ecwam/proenvhalo.F90 b/src/ecwam/proenvhalo.F90 index 9ecc6ca8..b5e27bff 100644 --- a/src/ecwam/proenvhalo.F90 +++ b/src/ecwam/proenvhalo.F90 @@ -30,7 +30,6 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & USE YOWGRID , ONLY : NPROMA_WAM, NCHNK, KIJL4CHNK, IJFROMCHNK USE YOWPARAM , ONLY : NFRE , NFRE_RED USE YOWSHAL , ONLY : BATHYMAX - USE YOWSTAT , ONLY : MPI_TIME USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK USE YOWDRVTYPE, ONLY: ENVIRONMENT, FREQUENCY @@ -40,7 +39,6 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & IMPLICIT NONE #include "mpexchng.intfb.h" -#include "wam_user_clock.intfb.h" INTEGER(KIND=JWIM), INTENT(IN) :: NINF, NSUP ! HALO EXTEND NINF to NSUP+1 @@ -52,8 +50,7 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & INTEGER(KIND=JWIM) :: IJ, M INTEGER(KIND=JWIM) :: ICHNK, KIJS, KIJL, IJSB, IJLB - REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB) :: TIME0 + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE, ZHOOK_HANDLE_MPI ! ---------------------------------------------------------------------- @@ -95,9 +92,9 @@ SUBROUTINE PROENVHALO (NINF, NSUP, & #ifdef WAM_GPU CALL WVPRPT_LAND%GET_DEVICE_DATA_RDONLY() #endif - TIME0=-WAM_USER_CLOCK() + IF (LHOOK) CALL DR_HOOK('MPI_TIME',0,ZHOOK_HANDLE_MPI) CALL MPEXCHNG(BUFFER_EXT, 3*NFRE_RED+5, 1, 1) - MPI_TIME = MPI_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 + IF (LHOOK) CALL DR_HOOK('MPI_TIME',1,ZHOOK_HANDLE_MPI) !$acc kernels present(WVPRPT_LAND) BUFFER_EXT(NSUP+1,1:NFRE_RED) = WVPRPT_LAND%WAVNUM(1:NFRE_RED) BUFFER_EXT(NSUP+1,NFRE_RED+1:2*NFRE_RED) = WVPRPT_LAND%CGROUP(1:NFRE_RED) diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index ee5d6e73..1984bcc7 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -47,7 +47,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & USE YOWMPP , ONLY : NINF ,NSUP USE YOWPARAM , ONLY : NANG ,NFRE ,NFRE_RED ,LLUNSTR USE YOWREFD , ONLY : LLUPDTTD ,THDD ,THDC ,SDOT - USE YOWSTAT , ONLY : IPROPAGS ,IFRELFMAX, DELPRO_LF, IDELPRO, MPI_TIME + USE YOWSTAT , ONLY : IPROPAGS ,IFRELFMAX, DELPRO_LF, IDELPRO USE YOWUBUF , ONLY : LUPDTWGHT #ifdef WAM_HAVE_UNWAM USE UNWAM , ONLY : PROPAG_UNWAM @@ -70,7 +70,6 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & #include "propags1.intfb.h" #include "propags2.intfb.h" #include "propdot.intfb.h" -#include "wam_user_clock.intfb.h" TYPE(WVGRIDGLO), INTENT(IN) :: BLK2GLO REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(INOUT) :: FL1 @@ -83,8 +82,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & INTEGER(KIND=JWIM) :: NSTEP_LF, ISUBST INTEGER(KIND=JWIM) :: IJSG, IJLG, ICHNK, KIJS, KIJL, IJSB, IJLB - REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - REAL(KIND=JWRB) :: TIME0 + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE, ZHOOK_HANDLE_MPI ! Spectra extended with the halo exchange for the propagation ! But limited to NFRE_RED frequencies @@ -161,9 +159,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & ! OBTAIN INFORMATION AT NEIGHBORING GRID POINTS (HALO) ! ---------------------------------------------------- - TIME0=-WAM_USER_CLOCK() + IF (LHOOK) CALL DR_HOOK('MPI_TIME',0,ZHOOK_HANDLE_MPI) CALL MPEXCHNG(FL1_EXT, NANG, 1, NFRE_RED) - MPI_TIME = MPI_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 + IF (LHOOK) CALL DR_HOOK('MPI_TIME',1,ZHOOK_HANDLE_MPI) CALL GSTATS(1430,0) @@ -271,9 +269,9 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WAVNUM, CGROUP, OMOSNH2KD, FL1, & !$OMP END PARALLEL DO #endif /*_OPENACC*/ - TIME0=-WAM_USER_CLOCK() + IF (LHOOK) CALL DR_HOOK('MPI_TIME',0,ZHOOK_HANDLE_MPI) CALL MPEXCHNG(FL1_EXT(:,:,1:IFRELFMAX), NANG, 1, IFRELFMAX) - MPI_TIME = MPI_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 + IF (LHOOK) CALL DR_HOOK('MPI_TIME',1,ZHOOK_HANDLE_MPI) #ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL) diff --git a/src/ecwam/runwam.F90 b/src/ecwam/runwam.F90 index ae95a79f..c5556260 100644 --- a/src/ecwam/runwam.F90 +++ b/src/ecwam/runwam.F90 @@ -102,7 +102,7 @@ SUBROUTINE RUNWAM USE YOWMPP , ONLY : IRANK ,NPROC USE YOWSTAT , ONLY : CDATEE ,CDTPRO , & & IPROPAGS ,LSUBGRID ,IREFRA ,IDELPRO, TIME_PHYS, & - & TIME_PROPAG, MODEL_TIME, IO_TIME, MPI_TIME + & TIME_PROPAG USE YOWWAMI , ONLY : CBPLTDT ,CEPLTDT USE YOWALTAS , ONLY : LODBRALT USE MPL_MODULE, ONLY : MPL_INIT, MPL_END, MPL_COMM @@ -390,21 +390,10 @@ SUBROUTINE RUNWAM WRITE (IU06,'(A)') ' ++++++++++++++++++++++++++++++' WRITE (IU06,'(A)') ' + TOTAL USER TIME IN SECONDS +' WRITE (IU06,'(A,F18.2,A)') ' + ', time, ' +' - WRITE (IU06,'(A)') ' + MODEL TIME +' - WRITE (IU06,'(A,F18.2,A)') ' + ', MODEL_TIME, ' +' - WRITE (IU06,'(A)') ' + IO TIME +' - WRITE (IU06,'(A,F18.2,A)') ' + ', IO_TIME, ' +' - WRITE (IU06,'(A)') ' + MPI TIME +' - WRITE (IU06,'(A,F18.2,A)') ' + ', MPI_TIME, ' +' WRITE (IU06,'(A)') ' + WAVE PROPAGATION TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PROPAG, ' +' -#if defined(WAM_GPU) WRITE (IU06,'(A)') ' + SOURCE TERM TIME +' WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' -#else - WRITE (IU06,'(A)') ' + SOURCE TERM TIME +' - WRITE (IU06,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' -#endif WRITE (IU06,'(A)') ' + +' WRITE (IU06,'(A,I8,A)') ' + ON PE : ', IRANK, ' +' WRITE (IU06,'(A)') ' ++++++++++++++++++++++++++++++' @@ -412,21 +401,10 @@ SUBROUTINE RUNWAM WRITE (6,'(A)') ' ++++++++++++++++++++++++++++++' WRITE (6,'(A)') ' + TOTAL USER TIME IN SECONDS +' WRITE (6,'(A,F18.2,A)') ' + ', time, ' +' - WRITE (6,'(A)') ' + MODEL TIME +' - WRITE (6,'(A,F18.2,A)') ' + ', MODEL_TIME, ' +' - WRITE (6,'(A)') ' + IO TIME +' - WRITE (6,'(A,F18.2,A)') ' + ', IO_TIME, ' +' - WRITE (6,'(A)') ' + MPI TIME +' - WRITE (6,'(A,F18.2,A)') ' + ', MPI_TIME, ' +' WRITE (6,'(A)') ' + WAVE PROPAGATION TIME +' WRITE (6,'(A,F18.2,A)') ' + ', TIME_PROPAG, ' +' -#if defined(WAM_GPU) WRITE (6,'(A)') ' + SOURCE TERM TIME +' WRITE (6,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' -#else - WRITE (6,'(A)') ' + SOURCE TERM TIME +' - WRITE (6,'(A,F18.2,A)') ' + ', TIME_PHYS, ' +' -#endif WRITE (6,'(A)') ' + +' WRITE (6,'(A,I8,A)') ' + ON PE : ', IRANK, ' +' WRITE (6,'(A)') ' ++++++++++++++++++++++++++++++' diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 0dd5620c..0695abef 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -78,7 +78,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & INTEGER(KIND=JWIM) :: ICHNK INTEGER(KIND=JWIM) :: IDELWH -REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE, ZHOOK_HANDLE_IMPLSCH, ZHOOK_HANDLE_DATA_OFFLOAD LOGICAL, SAVE :: LLNEWFILE @@ -96,6 +96,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !$loki update_device ENDIF +IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=0) CALL VARS_4D%F_FL1%GET_DEVICE_DATA_RDWR(VARS_4D%FL1) !$acc enter data attach(VARS_4D%FL1) @@ -103,6 +104,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL WVENVI%GET_DEVICE_DATA_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) CALL BLK2GLO%GET_DEVICE_DATA_RDONLY() +IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) !$acc data present(VARS_4D, WVPRPT, WVENVI, BLK2GLO) @@ -117,6 +119,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !* RETRIEVING NEW FORCING FIELDS IF NEEDED. ! ---------------------------------------- +IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=1) CALL FF_NOW%GET_DEVICE_DATA_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & @@ -124,6 +127,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL FF_NEXT%GET_DEVICE_DATA_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) +IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) CALL NEWWIND(CDTIMP, CDATEWH, LLNEWFILE, & & WVPRPT, FF_NOW, FF_NEXT) @@ -135,6 +139,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL GSTATS(1431,0) IF (LLSOURCE) THEN + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=2) CALL WAM2NEMO%GET_DEVICE_DATA_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & @@ -145,7 +150,9 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL VARS_4D%F_XLLWS%GET_DEVICE_DATA_RDWR(VARS_4D%XLLWS) !$acc enter data attach(VARS_4D%XLLWS) CALL MIJ%GET_DEVICE_DATA_RDWR() + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) + IF (LHOOK) CALL DR_HOOK('IMPLSCH',0,ZHOOK_HANDLE_IMPLSCH) TIME0=-WAM_USER_CLOCK() !$acc parallel loop gang vector_length(NPROMA_WAM) default(present) copyin(NPROMA_WAM) @@ -175,11 +182,14 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !$acc end parallel loop TIME_PHYS = TIME_PHYS + (TIME0+WAM_USER_CLOCK())*1.E-06 + IF (LHOOK) CALL DR_HOOK('IMPLSCH',1,ZHOOK_HANDLE_IMPLSCH) + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL VARS_4D%F_FL1%SYNC_HOST_RDONLY(QUEUE=3) CALL FF_NOW%SYNC_HOST_RDONLY(QUEUE=3) CALL WVENVI%SYNC_HOST_RDONLY(QUEUE=4) CALL WAM2NEMO%SYNC_HOST_RDONLY(QUEUE=5) + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) IF (LWNEMOCOU) NEMONTAU = NEMONTAU + 1 diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index 4b6cf93f..aaa89b6f 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -74,8 +74,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & IDELWI ,IREST ,IDELRES ,IDELINT , & & CDTBC ,IDELBC , & & IASSI ,MARSTYPE , & - & LLSOURCE ,LANAONLY ,LFRSTFLD ,IREFDATE, LUPDATE_GPU_GLOBALS, & - & MODEL_TIME, IO_TIME + & LLSOURCE ,LANAONLY ,LFRSTFLD ,IREFDATE, LUPDATE_GPU_GLOBALS USE YOWSPEC, ONLY : NBLKS ,NBLKE, MIJ USE YOWTEST , ONLY : IU06 USE YOWTEXT , ONLY : ICPLEN ,CPATH ,CWI ,LRESTARTED @@ -141,9 +140,9 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & INTEGER(KIND=JWIM) :: ICHNK INTEGER(KIND=JWIM) :: JSTPNEMO, IDATE, ITIME INTEGER(KIND=JWIM) :: IU04 - REAL(KIND=JWRB) :: TIME0, TIME1 - REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE, ZHOOK_HANDLE_DATA_OFFLOAD, & + & ZHOOK_HANDLE_ADVECTION_LOOP, ZHOOK_HANDLE_IO REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NIPRMOUT, NCHNK) :: BOUT CHARACTER(LEN= 2) :: MARSTYPEBAK @@ -203,8 +202,9 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & !* 1. ADVECTION/PHYSICS TIME LOOP. ! ---------------------------- - TIME0=-WAM_USER_CLOCK() + IF (LHOOK) CALL DR_HOOK('ADVECTION_LOOP',0,ZHOOK_HANDLE_ADVECTION_LOOP) #ifdef WAM_GPU + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WVPRPT_LAND%SYNC_DEVICE_RDONLY(QUEUE=0) CALL VARS_4D%F_FL1%SYNC_DEVICE_RDWR(QUEUE=0) CALL BLK2GLO%SYNC_DEVICE_RDONLY(QUEUE=0) @@ -225,6 +225,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & TAUOCYD=.TRUE., TAUOC=.TRUE., PHIOCD=.TRUE., PHIEPS=.TRUE., PHIAW=.TRUE., QUEUE=2) CALL VARS_4D%F_XLLWS%SYNC_DEVICE_RDWR(QUEUE=2) CALL MIJ%SYNC_DEVICE_RDWR(QUEUE=2) + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) #endif ADVECTION : DO KADV = 1,NADV @@ -347,12 +348,14 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & IF ( NGOUT > 0 .AND. (CDTINTT == CDTPRO .OR. LRST) ) THEN ! OUTPUT POINT SPECTRA (not usually used at ECMWF) #ifdef WAM_GPU + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=3) CALL VARS_4D%F_FL1%GET_HOST_DATA_RDONLY(VARS_4D%FL1) !$acc exit data detach(VARS_4D%FL1) CALL FF_NOW%GET_HOST_DATA_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) #endif CALL OUTWPSP (VARS_4D%FL1, FF_NOW) @@ -407,12 +410,14 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ENDIF #ifdef WAM_GPU + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=3) CALL VARS_4D%F_FL1%GET_HOST_DATA_RDONLY(VARS_4D%FL1) !$acc exit data detach(VARS_4D%FL1) CALL FF_NOW%GET_HOST_DATA_RDONLY(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) #endif CALL OUTSPEC(VARS_4D%FL1, FF_NOW) @@ -429,6 +434,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ! -------------------------------------- IF ( .NOT.LGRIBOUT .OR. LDWRRE ) THEN #ifdef WAM_GPU + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=3) CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=4) @@ -439,9 +445,10 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE.) CALL WVENVI%GET_HOST_DATA_RDONLY(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE.) + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) #endif - TIME1=-WAM_USER_CLOCK() + IF (LHOOK) CALL DR_HOOK('IO_TIME',0,ZHOOK_HANDLE_IO) CALL SAVSTRESS(WVENVI, FF_NOW, NBLKS, NBLKE, CDTPRO, CDATEF) WRITE(IU06,*) ' ' WRITE(IU06,*) ' BINARY STRESS FILE DISPOSED AT........ CDTPRO = ', CDTPRO @@ -451,7 +458,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & WRITE(IU06,*) ' BINARY WAVE SPECTRA DISPOSED AT........ CDTPRO = ', CDTPRO WRITE(IU06,*) ' ' CALL FLUSH(IU06) - IO_TIME = IO_TIME + (TIME1+WAM_USER_CLOCK())*1.E-06 + IF (LHOOK) CALL DR_HOOK('IO_TIME',1,ZHOOK_HANDLE_IO) ENDIF @@ -521,9 +528,9 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & MARSTYPE='an' ENDIF - TIME1=-WAM_USER_CLOCK() + IF (LHOOK) CALL DR_HOOK('IO_TIME',0,ZHOOK_HANDLE_IO) CALL OUTWINT(BOUT) - IO_TIME = IO_TIME + (TIME1+WAM_USER_CLOCK())*1.E-06 + IF (LHOOK) CALL DR_HOOK('IO_TIME',1,ZHOOK_HANDLE_IO) LLFLUSH = .TRUE. MARSTYPE=MARSTYPEBAK @@ -535,12 +542,14 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & #ifdef WAM_GPU + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL VARS_4D%F_FL1%SYNC_DEVICE_RDWR(QUEUE=0) CALL WVENVI%SYNC_DEVICE_RDWR(DEPTH=.TRUE., DELLAM1=.TRUE., COSPHM1=.TRUE., UCUR=.TRUE., VCUR=.TRUE., & & EMAXDPT=.TRUE., IOBND=.TRUE., IODP=.TRUE., QUEUE=0) CALL FF_NOW%SYNC_DEVICE_RDWR(AIRD=.TRUE., WDWAVE=.TRUE., CICOVER=.TRUE., WSWAVE=.TRUE., & & WSTAR=.TRUE., UFRIC=.TRUE., TAUW=.TRUE., TAUWDIR=.TRUE., Z0M=.TRUE., Z0B=.TRUE., & & CHRNCK=.TRUE., CITHICK=.TRUE., USTRA=.TRUE., VSTRA=.TRUE., QUEUE=1) + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) #endif !* 1.10 FLUSH FDB IF IT HAS BEEN USED AND IT IS NOT AN ANALYSIS (it will be done in *wamassi*) @@ -593,10 +602,12 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & IF (MOD(NEMOWSTEP,NEMOFRCO) == 0) THEN #ifdef WAM_GPU + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WAIT_FOR_ASYNC_QUEUE(QUEUE=5) CALL WAM2NEMO%GET_HOST_DATA_RDONLY(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE.) + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) #endif CALL UPDNEMOFIELDS @@ -612,9 +623,11 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & NEMOCSTEP = NEMOCSTEP + NEMONSTEP #ifdef WAM_GPU + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WAM2NEMO%SYNC_DEVICE_RDWR(NEMOUSTOKES=.TRUE., NEMOVSTOKES=.TRUE., NEMOSTRN=.TRUE., & & NPHIEPS=.TRUE., NTAUOC=.TRUE., NSWH=.TRUE., NMWP=.TRUE., NEMOTAUX=.TRUE., & & NEMOTAUY=.TRUE., NEMOWSWAVE=.TRUE., NEMOPHIF=.TRUE., QUEUE=2) + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) #endif ENDIF ENDIF @@ -625,6 +638,7 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & ENDDO ADVECTION #ifdef WAM_GPU + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',0,ZHOOK_HANDLE_DATA_OFFLOAD) CALL WVPRPT_LAND%GET_HOST_DATA_RDWR() CALL WVPRPT%GET_HOST_DATA_RDWR() CALL WVENVI%GET_HOST_DATA_RDWR() @@ -646,9 +660,9 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & CALL VARS_4D%DELETE_DEVICE_DATA() CALL MIJ%DELETE_DEVICE_DATA() CALL BLK2GLO%DELETE_DEVICE_DATA() + IF (LHOOK) CALL DR_HOOK('DATA_OFFLOAD',1,ZHOOK_HANDLE_DATA_OFFLOAD) #endif - - MODEL_TIME = MODEL_TIME + (TIME0+WAM_USER_CLOCK())*1.E-06 + IF (LHOOK) CALL DR_HOOK('ADVECTION_LOOP',1,ZHOOK_HANDLE_ADVECTION_LOOP) IF (LHOOK) CALL DR_HOOK('WAMODEL',1,ZHOOK_HANDLE) diff --git a/src/ecwam/yowstat.F90 b/src/ecwam/yowstat.F90 index 4ddfe308..c50083a3 100644 --- a/src/ecwam/yowstat.F90 +++ b/src/ecwam/yowstat.F90 @@ -93,9 +93,6 @@ MODULE YOWSTAT REAL(KIND=JWRB) :: TIME_PROPAG = 0._JWRB REAL(KIND=JWRB) :: TIME_PHYS = 0._JWRB - REAL(KIND=JWRB) :: MODEL_TIME = 0._JWRB - REAL(KIND=JWRB) :: IO_TIME = 0._JWRB - REAL(KIND=JWRB) :: MPI_TIME = 0._JWRB !* VARIABLE. TYPE. PURPOSE. ! --------- ------- -------- From 8851664d6bf040228f21cb6ff153e9920375cb6c Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 16 Sep 2024 14:51:08 +0000 Subject: [PATCH 14/17] Don't explicitly map MPI rank to GPU --- README.md | 2 -- src/ecwam/CMakeLists.txt | 1 - src/ecwam/wam_init_gpu_mod.F90 | 30 ------------------------------ src/ecwam/wvwaminit.F90 | 5 ----- 4 files changed, 38 deletions(-) delete mode 100644 src/ecwam/wam_init_gpu_mod.F90 diff --git a/README.md b/README.md index 6c56b0fd..8ee1e9c8 100644 --- a/README.md +++ b/README.md @@ -241,8 +241,6 @@ Running ------- No extra run-time options are needed to run the GPU enabled ecWam. Please note that this means that if ecWam is built using the `--with-loki` and `--with-acc` bundle arguments, it will necessarily be offloaded for GPU execution. -For multi-GPU runs, the number of GPUs maps to the number of MPI ranks. Thus multiple GPUs can be requested by -launching with multiple MPI ranks. The mapping of MPI ranks to GPUs assumes at most 4 GPUs per host node. Environment variables --------------------- diff --git a/src/ecwam/CMakeLists.txt b/src/ecwam/CMakeLists.txt index 0df935b9..805a76ae 100644 --- a/src/ecwam/CMakeLists.txt +++ b/src/ecwam/CMakeLists.txt @@ -269,7 +269,6 @@ list( APPEND ecwam_srcs w_mode_st.F90 w_pdf.F90 w_pmax.F90 - wam_init_gpu_mod.F90 wam_multio_mod.F90 wam_nproma.F90 wam_sorti.F90 diff --git a/src/ecwam/wam_init_gpu_mod.F90 b/src/ecwam/wam_init_gpu_mod.F90 deleted file mode 100644 index 2c16e061..00000000 --- a/src/ecwam/wam_init_gpu_mod.F90 +++ /dev/null @@ -1,30 +0,0 @@ -! (C) Copyright 1989- ECMWF. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - - MODULE WAM_INIT_GPU_MOD - CONTAINS - SUBROUTINE WAM_INIT_GPU(IRANK) -#ifdef _OPENACC - USE OPENACC -#endif - USE PARKIND_WAVE, ONLY : JWIM - IMPLICIT NONE - - INTEGER(KIND=JWIM), INTENT(IN) :: IRANK - INTEGER :: DEVTYPE, DEVNUM, DEV - - -#ifdef _OPENACC - DEVTYPE = ACC_GET_DEVICE_TYPE() - DEVNUM = ACC_GET_NUM_DEVICES(DEVTYPE) - DEV = MOD(IRANK-1, DEVNUM) - CALL ACC_SET_DEVICE_NUM(DEV, DEVTYPE) -#endif - END SUBROUTINE WAM_INIT_GPU - END MODULE WAM_INIT_GPU_MOD diff --git a/src/ecwam/wvwaminit.F90 b/src/ecwam/wvwaminit.F90 index 811922e6..1b31d88b 100644 --- a/src/ecwam/wvwaminit.F90 +++ b/src/ecwam/wvwaminit.F90 @@ -47,7 +47,6 @@ SUBROUTINE WVWAMINIT (LLCOUPLED, IULOG, LLRNL, & USE MPL_MODULE, ONLY : MPL_MYRANK, MPL_NPROC USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK - USE WAM_INIT_GPU_MOD, ONLY : WAM_INIT_GPU ! ---------------------------------------------------------------------- IMPLICIT NONE @@ -89,10 +88,6 @@ SUBROUTINE WVWAMINIT (LLCOUPLED, IULOG, LLRNL, & IRANK = MPL_MYRANK() NPROC = MPL_NPROC() -#if defined(WAM_GPU) - CALL WAM_INIT_GPU(IRANK) -#endif - ! STANDARD OUTPUT UNIT ! -------------------- From 8ef50b13d625ea96a238be0de0d611749082ff5e Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 7 Oct 2024 16:37:36 +0000 Subject: [PATCH 15/17] ACC: use data regions rather than hard-coded parallel gang loops --- src/ecwam/cireduce_loki_gpu.F90 | 6 ++++-- src/ecwam/outbs_loki_gpu.F90 | 6 ++++-- src/ecwam/wamintgr_loki_gpu.F90 | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/ecwam/cireduce_loki_gpu.F90 b/src/ecwam/cireduce_loki_gpu.F90 index cfcfc002..755588a3 100644 --- a/src/ecwam/cireduce_loki_gpu.F90 +++ b/src/ecwam/cireduce_loki_gpu.F90 @@ -99,12 +99,14 @@ SUBROUTINE CIREDUCE_LOKI_GPU (WVPRPT, FF_NOW) ENDIF CALL GSTATS(1493,0) ! DETERMINE THE WAVE ATTENUATION FACTOR -!$acc parallel loop gang present(FF_NOW, WVPRPT) vector_length(NPROMA_WAM) +!$acc data present(FF_NOW, WVPRPT) + DO ICHNK = 1, NCHNK CALL CIWAF(1, NPROMA_WAM, WVPRPT%CGROUP(:,:,ICHNK), FF_NOW%CICOVER(:,ICHNK), & & FF_NOW%CITHICK(:,ICHNK), WVPRPT%CIWA(:,:,ICHNK)) ENDDO -!$acc end parallel loop + +!$acc end data CALL GSTATS(1493,1) ENDIF diff --git a/src/ecwam/outbs_loki_gpu.F90 b/src/ecwam/outbs_loki_gpu.F90 index f1135e59..9c4720b5 100644 --- a/src/ecwam/outbs_loki_gpu.F90 +++ b/src/ecwam/outbs_loki_gpu.F90 @@ -95,7 +95,8 @@ SUBROUTINE OUTBS_LOKI_GPU (MIJ, FL1, XLLWS, & !$loki update_device CALL GSTATS(1502,0) -!$acc parallel loop gang default(present) copyin(NPROMA_WAM) copyout(BOUT) vector_length(NPROMA_WAM) +!$acc data present(MIJ,WVPRPT,WVENVI,INTFLDS,FF_NOW,NEMO2WAM) copyout(BOUT) + DO ICHNK = 1, NCHNK CALL OUTBLOCK(1, NPROMA_WAM, MIJ(:,ICHNK), & & FL1(:,:,:,ICHNK), XLLWS(:,:,:,ICHNK), & @@ -117,7 +118,8 @@ SUBROUTINE OUTBS_LOKI_GPU (MIJ, FL1, XLLWS, & & NEMO2WAM%NEMOVCUR(:, ICHNK), & & BOUT(:,:,ICHNK)) ENDDO -!$acc end parallel loop + +!$acc end data CALL GSTATS(1502,1) ! PRINT OUT NORMS diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 0695abef..705e10e8 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -155,7 +155,8 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & IF (LHOOK) CALL DR_HOOK('IMPLSCH',0,ZHOOK_HANDLE_IMPLSCH) TIME0=-WAM_USER_CLOCK() -!$acc parallel loop gang vector_length(NPROMA_WAM) default(present) copyin(NPROMA_WAM) +!$acc data present(VARS_4D,WVPRPT,WVENVI,FF_NOW,WAM2NEMO,INTFLDS,MIJ) + DO ICHNK=1,NCHNK CALL IMPLSCH (1, NPROMA_WAM, VARS_4D%FL1(:,:,:,ICHNK), & & WVPRPT%WAVNUM(:,:,ICHNK), WVPRPT%CGROUP(:,:,ICHNK), WVPRPT%CIWA(:,:,ICHNK), & @@ -179,7 +180,8 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & & INTFLDS%PHIEPS(:,ICHNK), INTFLDS%PHIAW(:,ICHNK), & & MIJ%PTR(:,ICHNK), VARS_4D%XLLWS(:,:,:,ICHNK) ) END DO -!$acc end parallel loop + +!$acc end data TIME_PHYS = TIME_PHYS + (TIME0+WAM_USER_CLOCK())*1.E-06 IF (LHOOK) CALL DR_HOOK('IMPLSCH',1,ZHOOK_HANDLE_IMPLSCH) From 2fad889c286eeee073b6a8316f979eee49b0d455 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 10 Oct 2024 17:09:51 +0000 Subject: [PATCH 16/17] OUTBLOCK: test GPU ready version for all 87 output parameters --- src/ecwam/outblock.F90 | 150 ++++++++++++++++++++--------------------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/src/ecwam/outblock.F90 b/src/ecwam/outblock.F90 index 73403901..2454a46b 100644 --- a/src/ecwam/outblock.F90 +++ b/src/ecwam/outblock.F90 @@ -409,150 +409,150 @@ SUBROUTINE OUTBLOCK (KIJS, KIJL, MIJ, & ENDIF DO ITR=1,NTRAIN - IF (IPFGTBL(42) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(42))=4._JWRB*SQRT(MAX(EMTRAIN(KIJS:KIJL,ITR),0._JWRB)) + IF (IPFGTBL(42 + (ITR-1)*3) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(42 + (ITR-1)*3))=4._JWRB*SQRT(MAX(EMTRAIN(KIJS:KIJL,ITR),0._JWRB)) ENDIF - IF (IPFGTBL(43) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(43))=MOD(DEG*THTRAIN(KIJS:KIJL,ITR)+180._JWRB,360._JWRB) + IF (IPFGTBL(43 + (ITR-1)*3) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(43 + (ITR-1)*3))=MOD(DEG*THTRAIN(KIJS:KIJL,ITR)+180._JWRB,360._JWRB) ENDIF - IF (IPFGTBL(44) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(44))=PMTRAIN(KIJS:KIJL,ITR) + IF (IPFGTBL(44 + (ITR-1)*3) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(44 + (ITR-1)*3))=PMTRAIN(KIJS:KIJL,ITR) ENDIF ENDDO - IF (IPFGTBL(45) /= 0) THEN + IF (IPFGTBL(42 + 3*NTRAIN) /= 0) THEN IF (LWNEMOCOUSTRN) THEN - BOUT(KIJS:KIJL,ITOBOUT(45))=STRNMS(KIJS:KIJL) + BOUT(KIJS:KIJL,ITOBOUT(42 + 3*NTRAIN))=STRNMS(KIJS:KIJL) ELSE - CALL CIMSSTRN (KIJS, KIJL, FL1, WAVNUM, DEPTH, CITHICK, BOUT(:,ITOBOUT(45))) + CALL CIMSSTRN (KIJS, KIJL, FL1, WAVNUM, DEPTH, CITHICK, BOUT(:,ITOBOUT(42 + 3*NTRAIN))) ENDIF ENDIF - IF (IPFGTBL(46) /= 0) THEN + IF (IPFGTBL(43 + 3*NTRAIN) /= 0) THEN CALL SE10MEAN (KIJS, KIJL, FL2ND, FLD1) - BOUT(KIJS:KIJL,ITOBOUT(46))=4._JWRB*SQRT(MAX(FLD1(KIJS:KIJL),0._JWRB)) + BOUT(KIJS:KIJL,ITOBOUT(43 + 3*NTRAIN))=4._JWRB*SQRT(MAX(FLD1(KIJS:KIJL),0._JWRB)) ENDIF - IF (IPFGTBL(47) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(47))=AIRD(KIJS:KIJL) + IF (IPFGTBL(44 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(44 + 3*NTRAIN))=AIRD(KIJS:KIJL) ENDIF - IF (IPFGTBL(48) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(48))=WSTAR(KIJS:KIJL) + IF (IPFGTBL(45 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(45 + 3*NTRAIN))=WSTAR(KIJS:KIJL) ENDIF - IF (IPFGTBL(49) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(49))=CICOVER(KIJS:KIJL) + IF (IPFGTBL(46 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(46 + 3*NTRAIN))=CICOVER(KIJS:KIJL) ENDIF - IF (IPFGTBL(50) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(50))=CITHICK(KIJS:KIJL) + IF (IPFGTBL(47 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(47 + 3*NTRAIN))=CITHICK(KIJS:KIJL) ENDIF - IF (IPFGTBL(51) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(51))=C3(KIJS:KIJL) + IF (IPFGTBL(48 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(48 + 3*NTRAIN))=C3(KIJS:KIJL) ENDIF - IF (IPFGTBL(52) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(52))=NEMOSST(KIJS:KIJL) + IF (IPFGTBL(49 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(49 + 3*NTRAIN))=NEMOSST(KIJS:KIJL) ENDIF - IF (IPFGTBL(53) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(53))=NEMOCICOVER(KIJS:KIJL) + IF (IPFGTBL(50 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(50 + 3*NTRAIN))=NEMOCICOVER(KIJS:KIJL) ENDIF - IF (IPFGTBL(54) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(54))=NEMOCITHICK(KIJS:KIJL) + IF (IPFGTBL(51 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(51 + 3*NTRAIN))=NEMOCITHICK(KIJS:KIJL) ENDIF - IF (IPFGTBL(55) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(55))=NEMOUCUR(KIJS:KIJL) + IF (IPFGTBL(52 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(52 + 3*NTRAIN))=NEMOUCUR(KIJS:KIJL) ENDIF - IF (IPFGTBL(56) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(56))=NEMOVCUR(KIJS:KIJL) + IF (IPFGTBL(53 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(53 + 3*NTRAIN))=NEMOVCUR(KIJS:KIJL) ENDIF - IF (IPFGTBL(57) /= 0 .OR. IPFGTBL(58) /= 0) THEN + IF (IPFGTBL(54 + 3*NTRAIN) /= 0 .OR. IPFGTBL(55 + 3*NTRAIN) /= 0) THEN CALL WEFLUX (KIJS, KIJL, FL1, CGROUP, & & NFRE, NANG, DFIM, DELTH, & & COSTH, SINTH, & & FLD1, FLD2) ENDIF - IF (IPFGTBL(57) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(57))=FLD1(KIJS:KIJL) + IF (IPFGTBL(54 + 3*NTRAIN) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(54 + 3*NTRAIN))=FLD1(KIJS:KIJL) ENDIF - IF (IPFGTBL(58) /= 0) THEN + IF (IPFGTBL(55 + 3*NTRAIN) /= 0) THEN ! CONVERT DIRECTIONS TO DEGREES AND METEOROLOGICAL CONVENTION - BOUT(KIJS:KIJL,ITOBOUT(58))=MOD(DEG*FLD2(KIJS:KIJL)+180._JWRB,360._JWRB) + BOUT(KIJS:KIJL,ITOBOUT(55 + 3*NTRAIN))=MOD(DEG*FLD2(KIJS:KIJL)+180._JWRB,360._JWRB) ENDIF DO IH=1,NTEWH - IF (IPFGTBL(59) /= 0) THEN + IF (IPFGTBL(55 + 3*NTRAIN + IH) /= 0) THEN !$loki inline - CALL SEBTMEAN (KIJS, KIJL, FL2ND, TEWH(IH-1), TEWH(IH), BOUT(:,ITOBOUT(59))) + CALL SEBTMEAN (KIJS, KIJL, FL2ND, TEWH(IH-1), TEWH(IH), BOUT(:,ITOBOUT(55 + 3*NTRAIN + IH))) ! SIGNIFICANT WAVE HEIGHT CONVERSION - BOUT(KIJS:KIJL,ITOBOUT(59))=4._JWRB*SQRT(MAX(BOUT(KIJS:KIJL,ITOBOUT(59)),0._JWRB)) + BOUT(KIJS:KIJL,ITOBOUT(55 + 3*NTRAIN + IH))=4._JWRB*SQRT(MAX(BOUT(KIJS:KIJL,ITOBOUT(55 + 3*NTRAIN + IH)),0._JWRB)) ENDIF ENDDO - IF (IPFGTBL(60) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(60))=ETA_M(KIJS:KIJL) + IF (IPFGTBL(56 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(56 + 3*NTRAIN + NTEWH))=ETA_M(KIJS:KIJL) ENDIF - IF (IPFGTBL(61) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(61))=R(KIJS:KIJL) + IF (IPFGTBL(57 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(57 + 3*NTRAIN + NTEWH))=R(KIJS:KIJL) ENDIF - IF (IPFGTBL(62) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(62))=XNSLC(KIJS:KIJL) + IF (IPFGTBL(58 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(58 + 3*NTRAIN + NTEWH))=XNSLC(KIJS:KIJL) ENDIF - IF (IPFGTBL(63) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(63))=TAUXD(KIJS:KIJL) + IF (IPFGTBL(59 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(59 + 3*NTRAIN + NTEWH))=TAUXD(KIJS:KIJL) ENDIF - IF (IPFGTBL(64) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(64))=TAUYD(KIJS:KIJL) + IF (IPFGTBL(60 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(60 + 3*NTRAIN + NTEWH))=TAUYD(KIJS:KIJL) ENDIF - IF (IPFGTBL(65) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(65))=TAUOCXD(KIJS:KIJL) + IF (IPFGTBL(61 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(61 + 3*NTRAIN + NTEWH))=TAUOCXD(KIJS:KIJL) ENDIF - IF (IPFGTBL(66) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(66))=TAUOCYD(KIJS:KIJL) + IF (IPFGTBL(62 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(62 + 3*NTRAIN + NTEWH))=TAUOCYD(KIJS:KIJL) ENDIF - IF (IPFGTBL(67) /= 0) THEN + IF (IPFGTBL(63 + 3*NTRAIN + NTEWH) /= 0) THEN ! !!! make the energy flux positive - BOUT(KIJS:KIJL,ITOBOUT(67))=MAX(-PHIOCD(KIJS:KIJL),0.0_JWRB) + BOUT(KIJS:KIJL,ITOBOUT(63 + 3*NTRAIN + NTEWH))=MAX(-PHIOCD(KIJS:KIJL),0.0_JWRB) ENDIF !! alternative ways to determine wave height extremes - IF (IPFGTBL(67) /= 0 .OR. IPFGTBL(68) /= 0 .OR. & -& IPFGTBL(69) /= 0 .OR. IPFGTBL(70) /= 0 ) THEN + IF (IPFGTBL(63 + 3*NTRAIN + NTEWH) /= 0 .OR. IPFGTBL(64 + 3*NTRAIN + NTEWH) /= 0 .OR. & +& IPFGTBL(65 + 3*NTRAIN + NTEWH) /= 0 .OR. IPFGTBL(66 + 3*NTRAIN + NTEWH) /= 0 ) THEN CALL W_MAXH (KIJS, KIJL, FL1, DEPTH, WAVNUM, & & CMAX_F, HMAX_N, CMAX_ST, HMAX_ST, PHIST) ENDIF - IF (IPFGTBL(68) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(68))=CMAX_F(KIJS:KIJL) + IF (IPFGTBL(64 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(64 + 3*NTRAIN + NTEWH))=CMAX_F(KIJS:KIJL) ENDIF - IF (IPFGTBL(69) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(69))=HMAX_N(KIJS:KIJL) + IF (IPFGTBL(65 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(65 + 3*NTRAIN + NTEWH))=HMAX_N(KIJS:KIJL) ENDIF - IF (IPFGTBL(70) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(70))=CMAX_ST(KIJS:KIJL) + IF (IPFGTBL(66 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(66 + 3*NTRAIN + NTEWH))=CMAX_ST(KIJS:KIJL) ENDIF - IF (IPFGTBL(71) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(71))=HMAX_ST(KIJS:KIJL) + IF (IPFGTBL(67 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(67 + 3*NTRAIN + NTEWH))=HMAX_ST(KIJS:KIJL) ENDIF !! @@ -560,25 +560,25 @@ SUBROUTINE OUTBLOCK (KIJS, KIJL, MIJ, & ! COMPUTE OUTPUT EXTRA FIELDS ! add necessary code to compute the extra output fields !!!for testing - IF (IPFGTBL(72) /= 0) THEN - CALL CTCOR (KIJS, KIJL, FL1, BOUT(:,ITOBOUT(72))) + IF (IPFGTBL(68 + 3*NTRAIN + NTEWH) /= 0) THEN + CALL CTCOR (KIJS, KIJL, FL1, BOUT(:,ITOBOUT(68 + 3*NTRAIN + NTEWH))) ENDIF - IF (IPFGTBL(73) /= 0) THEN + IF (IPFGTBL(69 + 3*NTRAIN + NTEWH) /= 0) THEN XMODEL_CUTOFF=(ZPI*FR(NFRE))**2/G - CALL MEANSQS (XMODEL_CUTOFF, KIJS, KIJL, FL1, WAVNUM, UFRIC, COSWDIF, BOUT(:,ITOBOUT(73))) + CALL MEANSQS (XMODEL_CUTOFF, KIJS, KIJL, FL1, WAVNUM, UFRIC, COSWDIF, BOUT(:,ITOBOUT(69 + 3*NTRAIN + NTEWH))) ENDIF - IF (IPFGTBL(74) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(74))=0._JWRB + IF (IPFGTBL(70 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(70 + 3*NTRAIN + NTEWH))=0._JWRB ENDIF - IF (IPFGTBL(75) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(75))=0._JWRB + IF (IPFGTBL(71 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(71 + 3*NTRAIN + NTEWH))=0._JWRB ENDIF - IF (IPFGTBL(76) /= 0) THEN - BOUT(KIJS:KIJL,ITOBOUT(76))=0._JWRB + IF (IPFGTBL(72 + 3*NTRAIN + NTEWH) /= 0) THEN + BOUT(KIJS:KIJL,ITOBOUT(72 + 3*NTRAIN + NTEWH))=0._JWRB ENDIF From b4d300214eb8f2ca5e889e41119388ca4c0b7609 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 11 Oct 2024 08:09:42 +0000 Subject: [PATCH 17/17] Rebase cleanup --- src/ecwam/wamodel.F90 | 1 - src/ecwam/wdirspread.F90 | 1 - 2 files changed, 2 deletions(-) diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index aaa89b6f..f2967db5 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -112,7 +112,6 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & #include "updnemofields.intfb.h" #include "updnemostress.intfb.h" #include "writsta.intfb.h" -#include "wam_user_clock.intfb.h" #ifdef WAM_GPU #include "outbs_loki_gpu.intfb.h" diff --git a/src/ecwam/wdirspread.F90 b/src/ecwam/wdirspread.F90 index 4e5529dd..e80d71ce 100644 --- a/src/ecwam/wdirspread.F90 +++ b/src/ecwam/wdirspread.F90 @@ -55,7 +55,6 @@ SUBROUTINE WDIRSPREAD (KIJS, KIJL, F, EMEAN, LLPEAKF, WDIRSPRD) USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWFRED , ONLY : FR ,DFIM ,DELTH ,WETAIL - USE YOWFRED , ONLY : DELTH ,TH ,COSTH ,SINTH !... needed for Loki USE YOWPARAM , ONLY : NANG ,NFRE USE YOWPCONS , ONLY : EPSMIN