From 185c3453bccc15b0d094b71239cf433dafa42460 Mon Sep 17 00:00:00 2001 From: Zbigniew Piotrowski Date: Thu, 2 Nov 2023 17:21:45 +0100 Subject: [PATCH] Cleanup done, code compiles and executes, norms seem the same in the original and modified version, now time to put the headers in the stub modules to enable symbol modification in the trans(i) libraries with the driver modulename prefix --- src/programs/ectrans-benchmark-data_mod.F90 | 134 +---- src/programs/ectrans-benchmark-driver.F90 | 513 +------------------- src/programs/ectrans-benchmark-program.F90 | 497 ++----------------- src/programs/ectrans-benchmark.F90 | 2 +- 4 files changed, 34 insertions(+), 1112 deletions(-) diff --git a/src/programs/ectrans-benchmark-data_mod.F90 b/src/programs/ectrans-benchmark-data_mod.F90 index 33b0e2f5..473e8796 100644 --- a/src/programs/ectrans-benchmark-data_mod.F90 +++ b/src/programs/ectrans-benchmark-data_mod.F90 @@ -10,53 +10,17 @@ module transform_driver_data_mod use parkind1, only: jpim, jprb, jprd -use yomgstats, only: jpmaxstat - - implicit none - -integer(kind=jpim) :: istack, getstackusage real(kind=jprb), dimension(1) :: zmaxerr(5), zerr(5) real(kind=jprb) :: zmaxerrg - -! Output unit numbers -integer(kind=jpim), parameter :: nerr = 0 ! Unit number for STDERR -integer(kind=jpim), parameter :: nout = 6 ! Unit number for STDOUT -integer(kind=jpim), parameter :: noutdump = 7 ! Unit number for field output - -! Default parameters -integer(kind=jpim) :: nsmax = 79 ! Spectral truncation -integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test -integer(kind=jpim) :: nfld = 1 ! Number of scalar fields -integer(kind=jpim) :: nlev = 1 ! Number of vertical levels - -integer(kind=jpim) :: nflevg -integer(kind=jpim) :: ndgl ! Number of latitudes -integer(kind=jpim) :: nspec2 -integer(kind=jpim) :: ngptot -integer(kind=jpim) :: ngptotg -integer(kind=jpim) :: ifld -integer(kind=jpim) :: jroc -integer(kind=jpim) :: jb -integer(kind=jpim) :: nspec2g -integer(kind=jpim) :: i -integer(kind=jpim) :: ja -integer(kind=jpim) :: ib -integer(kind=jpim) :: jprtrv - -integer(kind=jpim), allocatable :: nloen(:), nprcids(:) -integer(kind=jpim) :: myproc, jj -integer :: jstep - -real(kind=jprd) :: ztinit, ztloop, timef, ztstepmax, ztstepmin, ztstepavg, ztstepmed +real(kind=jprd) :: ztstepmax, ztstepmin, ztstepavg, ztstepmed real(kind=jprd) :: ztstepmax1, ztstepmin1, ztstepavg1, ztstepmed1 real(kind=jprd) :: ztstepmax2, ztstepmin2, ztstepavg2, ztstepmed2 real(kind=jprd), allocatable :: ztstep(:), ztstep1(:), ztstep2(:) real(kind=jprb), allocatable :: znormsp(:), znormsp1(:), znormdiv(:), znormdiv1(:) real(kind=jprb), allocatable :: znormvor(:), znormvor1(:), znormt(:), znormt1(:) -real(kind=jprd) :: zaveave(0:jpmaxstat) ! Grid-point space data structures real(kind=jprb), allocatable, target :: zgmv (:,:,:,:) ! Multilevel fields at t and t-dt @@ -72,102 +36,6 @@ module transform_driver_data_mod real(kind=jprb), pointer :: zspsc3a(:,:,:) => null() real(kind=jprb), allocatable :: zspsc2(:,:) -logical :: lstack = .false. ! Output stack info -logical :: luserpnm = .false. -logical :: lkeeprpnm = .false. -logical :: luseflt = .false. ! Use fast legendre transforms -logical :: ltrace_stats = .false. -logical :: lstats_omp = .false. -logical :: lstats_comms = .false. -logical :: lstats_mpl = .false. -logical :: lstats = .true. ! gstats statistics -logical :: lbarrier_stats = .false. -logical :: lbarrier_stats2 = .false. -logical :: ldetailed_stats = .false. -logical :: lstats_alloc = .false. -logical :: lsyncstats = .false. -logical :: lstatscpu = .false. -logical :: lstats_mem = .false. -logical :: lxml_stats = .false. -logical :: lfftw = .true. ! Use FFTW for Fourier transforms -logical :: lvordiv = .false. -logical :: lscders = .false. -logical :: luvders = .false. -logical :: lprint_norms = .false. ! Calculate and print spectral norms -logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end - -integer(kind=jpim) :: nstats_mem = 0 -integer(kind=jpim) :: ntrace_stats = 0 -integer(kind=jpim) :: nprnt_stats = 1 - -! The multiplier of the machine epsilon used as a tolerance for correctness checking -! ncheck = 0 (the default) means that correctness checking is disabled -integer(kind=jpim) :: ncheck = 0 - -logical :: lmpoff = .false. ! Message passing switch - -! Verbosity level (0 or 1) -integer :: verbosity = 0 - -real(kind=jprb) :: zra = 6371229._jprb - -integer(kind=jpim) :: nmax_resol = 37 ! Max number of resolutions -integer(kind=jpim) :: npromatr = 0 ! nproma for trans lib -integer(kind=jpim) :: ncombflen = 1800000 ! Size of comm buffer - -integer(kind=jpim) :: nproc ! Number of procs -integer(kind=jpim) :: nthread -integer(kind=jpim) :: nprgpns ! Grid-point decomp -integer(kind=jpim) :: nprgpew ! Grid-point decomp -integer(kind=jpim) :: nprtrv = 0 ! Spectral decomp -integer(kind=jpim) :: nprtrw = 0 ! Spectral decomp -integer(kind=jpim) :: nspecresmin = 80 ! Minimum spectral resolution, for controlling nprtrw -integer(kind=jpim) :: mysetv -integer(kind=jpim) :: mysetw -integer(kind=jpim) :: mp_type = 2 ! Message passing type -integer(kind=jpim) :: mbx_size = 150000000 ! Mailbox size - -integer(kind=jpim) :: nflevl - -! sumpini -integer(kind=jpim) :: isqr -logical :: lsync_trans = .true. ! Activate barrier sync -logical :: leq_regions = .true. ! Eq regions flag - - -integer(kind=jpim) :: nproma = 0 -integer(kind=jpim) :: ngpblks -! locals -integer(kind=jpim) :: iprtrv -integer(kind=jpim) :: iprtrw -integer(kind=jpim) :: iprused, ilevpp, irest, ilev, jlev - -integer(kind=jpim) :: ndimgmv = 0 ! Third dim. of gmv "(nproma,nflevg,ndimgmv,ngpblks)" -integer(kind=jpim) :: ndimgmvs = 0 ! Second dim. gmvs "(nproma,ndimgmvs,ngpblks)" - -integer(kind=jpim) :: jbegin_uv = 0 -integer(kind=jpim) :: jend_uv = 0 -integer(kind=jpim) :: jbegin_sc = 0 -integer(kind=jpim) :: jend_sc = 0 -integer(kind=jpim) :: jbegin_scder_NS = 0 -integer(kind=jpim) :: jend_scder_NS = 0 -integer(kind=jpim) :: jbegin_scder_EW = 0 -integer(kind=jpim) :: jend_scder_EW = 0 -integer(kind=jpim) :: jbegin_uder_EW = 0 -integer(kind=jpim) :: jend_uder_EW = 0 -integer(kind=jpim) :: jbegin_vder_EW = 0 -integer(kind=jpim) :: jend_vder_EW = 0 - -logical :: ldump_values = .false. - -integer, external :: ec_mpirank -logical :: luse_mpi = .true. - -character(len=16) :: cgrid = '' - -integer(kind=jpim) :: ierr - - end module transform_driver_data_mod !=================================================================================================== diff --git a/src/programs/ectrans-benchmark-driver.F90 b/src/programs/ectrans-benchmark-driver.F90 index dc8f64c7..52d22a66 100644 --- a/src/programs/ectrans-benchmark-driver.F90 +++ b/src/programs/ectrans-benchmark-driver.F90 @@ -8,41 +8,8 @@ ! MODULE transform_driver - -! -! Spectral transform test -! -! This test performs spectral to real and real to spectral transforms repeated in -! timed loop. -! -! 1) One "surface" field is always transformed: -! zspsc2(1,1:nspec2) <-> zgmvs(1:nproma,1:1,1:ngbplk) -! -! 2) A Multiple "3d" fields are transformed and can be disabled with "--nfld 0" -! -! zspsc3a(1:nlev,1:nspec2,1:nfld) <-> zgp3a(1:nproma,1:nlev,1:nfld,1:ngpblk) -! -! 3) Optionally a "3d" vorticity/divergence field is transformed to uv (wind) and -! can be enabled with "--vordiv" -! -! zspvor(1:nlev,1:nspec2) / zspdiv(1:nlev,1:nspec2) <-> zgpuv(1:nproma,1:nlev,1:2,1:ngpblk) -! -! 4) Optionally scalar derivatives can be computed for the fields described in 1) and 2) -! This must be enabled with "--scders" -! -! 5) Optionally uv East-West derivate can be computed from vorticity/divergence. -! This must be enabled with "--vordiv --uvders" -! -! -! Authors : George Mozdzynski -! Willem Deconinck -! Ioan Hadade -! Sam Hatfield -! - use parkind1, only: jpim, jprb, jprd use oml_mod ,only : oml_max_threads -use mpl_module use yomgstats, only: jpmaxstat use yomhook, only : dr_hook_init @@ -60,173 +27,6 @@ MODULE transform_driver !!=================================================================================================== CONTAINS -! -!luse_mpi = detect_mpirun() -! -!! Setup -!call get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, lscders, luvders, & -! & luseflt, nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprtrv, nprtrw, ncheck) -!if (cgrid == '') cgrid = cubic_octahedral_gaussian_grid(nsmax) -!call parse_grid(cgrid, ndgl, nloen) -!nflevg = nlev -! -!!=================================================================================================== -! -!if (luse_mpi) then -! call mpl_init(ldinfo=(verbosity>=1)) -! nproc = mpl_nproc() -! myproc = mpl_myrank() -!else -! nproc = 1 -! myproc = 1 -! mpl_comm = -1 -!endif -!nthread = oml_max_threads() -! -!call dr_hook_init() -! -!!=================================================================================================== -! -!if( lstats ) call gstats(0,0) -!ztinit = timef() -! -!! only output to stdout on pe 1 -!if (nproc > 1) then -! if (myproc /= 1) then -! open(unit=nout, file='/dev/null') -! endif -!endif -! -!if (ldetailed_stats) then -! lstats_omp = .true. -! lstats_comms = .true. -! lstats_mpl = .true. -! lstatscpu = .true. -! nprnt_stats = nproc -!! lstats_mem = .true. -!! lstats_alloc = .true. -!endif -! -!!=================================================================================================== -! -!allocate(nprcids(nproc)) -!do jj = 1, nproc -! nprcids(jj) = jj -!enddo -! -!if (nproc <= 1) then -! lmpoff = .true. -!endif -! -!! Compute nprgpns and nprgpew -!! This version selects most square-like distribution -!! These will change if leq_regions=.true. -!if (nproc == 0) nproc = 1 -!isqr = int(sqrt(real(nproc,jprb))) -!do ja = isqr, nproc -! ib = nproc/ja -! if (ja*ib == nproc) then -! nprgpns = max(ja,ib) -! nprgpew = min(ja,ib) -! exit -! endif -!enddo -! -!! From sumpini, although this should be specified in namelist -!if (nspecresmin == 0) nspecresmin = nproc -! -!! Compute nprtrv and nprtrw if not provided on the command line -!if (nprtrv > 0 .or. nprtrw > 0) then -! if (nprtrv == 0) nprtrv = nproc/nprtrw -! if (nprtrw == 0) nprtrw = nproc/nprtrv -! if (nprtrw*nprtrv /= nproc) call abor1('transform_test:nprtrw*nprtrv /= nproc') -! if (nprtrw > nspecresmin) call abor1('transform_test:nprtrw > nspecresmin') -!else -! do jprtrv = 4, nproc -! nprtrv = jprtrv -! nprtrw = nproc/nprtrv -! if (nprtrv*nprtrw /= nproc) cycle -! if (nprtrv > nprtrw) exit -! if (nprtrw > nspecresmin) cycle -! if (nprtrw <= nspecresmin/(2*oml_max_threads())) exit -! enddo -! ! Go for approx square partition for backup -! if (nprtrv*nprtrw /= nproc .or. nprtrw > nspecresmin .or. nprtrv > nprtrw) then -! isqr = int(sqrt(real(nproc,jprb))) -! do ja = isqr, nproc -! ib = nproc/ja -! if (ja*ib == nproc) then -! nprtrw = max(ja, ib) -! nprtrv = min(ja, ib) -! if (nprtrw > nspecresmin ) then -! call abor1('transform_test:nprtrw (approx square value) > nspecresmin') -! endif -! exit -! endif -! enddo -! endif -!endif -! -!! Create communicators for mpi groups -!if (.not.lmpoff) then -! call mpl_groups_create(nprtrw, nprtrv) -!endif -! -!if (lmpoff) then -! mysetw = (myproc - 1)/nprtrv + 1 -! mysetv = mod(myproc - 1, nprtrv) + 1 -!else -! call mpl_cart_coords(myproc, mysetw, mysetv) -! -! ! Just checking for now... -! iprtrv = mod(myproc - 1, nprtrv) + 1 -! iprtrw = (myproc - 1)/nprtrv + 1 -! if (iprtrv /= mysetv .or. iprtrw /= mysetw) then -! call abor1('transform_test:inconsistency when computing mysetw and mysetv') -! endif -!endif -! -!if (.not. lmpoff) then -! call mpl_buffer_method(kmp_type=mp_type, kmbx_size=mbx_size, kprocids=nprcids, ldinfo=(verbosity>=1)) -!endif -! -!! Determine number of local levels for fourier and legendre calculations -!! based on the values of nflevg and nprtrv -!allocate(numll(nprtrv+1)) -! -!! Calculate remainder -!iprused = min(nflevg+1, nprtrv) -!ilevpp = nflevg/nprtrv -!irest = nflevg -ilevpp*nprtrv -!do jroc = 1, nprtrv -! if (jroc <= irest) then -! numll(jroc) = ilevpp+1 -! else -! numll(jroc) = ilevpp -! endif -!enddo -!numll(iprused+1:nprtrv+1) = 0 -! -!nflevl = numll(mysetv) -! -!ivsetsc(1) = iprused -!ifld = 0 -! -!!=================================================================================================== -!! Setup gstats -!!=================================================================================================== -! -!if (lstats) then -! call gstats_setup(nproc, myproc, nprcids, & -! & lstats, lstatscpu, lsyncstats, ldetailed_stats, lbarrier_stats, lbarrier_stats2, & -! & lstats_omp, lstats_comms, lstats_mem, nstats_mem, lstats_alloc, & -! & ltrace_stats, ntrace_stats, nprnt_stats, lxml_stats) -! call gstats_psut -! -! ! Assign labels to GSTATS regions -! call gstats_labels -!endif -! !=================================================================================================== ! ecTrans setup routines !=================================================================================================== @@ -715,7 +515,9 @@ FUNCTION ectrans_print_norms_fails(nout,ncheck) result (ierr) ierr = 1 endif END FUNCTION ectrans_print_norms_fails + SUBROUTINE ectrans_norms_reduce(ztloop) +use mpl_module USE transform_driver_data_mod, ONLY: ztstepmax , ztstepmin , ztstepavg USE transform_driver_data_mod, ONLY: ztstepmax1, ztstepmin1, ztstepavg1 USE transform_driver_data_mod, ONLY: ztstepmax2, ztstepmin2, ztstepavg2 @@ -737,6 +539,7 @@ SUBROUTINE ectrans_norms_reduce(ztloop) call mpl_allreduce(ztstepmax2, 'max', ldreprod=.false.) call mpl_allreduce(ztstepmin2, 'min', ldreprod=.false.) END SUBROUTINE ectrans_norms_reduce + SUBROUTINE ectrans_compute_time_stats(nproc,iters) USE transform_driver_data_mod, ONLY : ztstep, ztstep1, ztstep2 USE transform_driver_data_mod, ONLY : ztstepavg, ztstepavg1, ztstepavg2 @@ -798,193 +601,6 @@ SUBROUTINE ectrans_print_time_stats(nout,ztloop,nproc) write(nout,'(" ")') END SUBROUTINE ectrans_print_time_stats -!=================================================================================================== - -!=================================================================================================== - -!subroutine parse_grid(cgrid,ndgl,nloen) -! -! character(len=*) :: cgrid -! integer, intent(inout) :: ndgl -! integer, intent(inout), allocatable :: nloen(:) -! integer :: ios -! integer :: gaussian_number -! read(cgrid(2:len_trim(cgrid)),*,IOSTAT=ios) gaussian_number -! if (ios==0) then -! ndgl = 2 * gaussian_number -! allocate(nloen(ndgl)) -! if (cgrid(1:1) == 'F') then ! Regular Gaussian grid -! nloen(:) = gaussian_number * 4 -! return -! endif -! if (cgrid(1:1) == 'O') then ! Octahedral Gaussian grid -! do i = 1, ndgl / 2 -! nloen(i) = 20 + 4 * (i - 1) -! nloen(ndgl - i + 1) = nloen(i) -! end do -! return -! endif -! endif -! call parsing_failed("ERROR: Unsupported grid specified: "// trim(cgrid)) -! -!end subroutine -! -!!=================================================================================================== -! -!function get_int_value(cname, iarg) result(value) -! -! integer :: value -! character(len=*), intent(in) :: cname -! integer, intent(inout) :: iarg -! character(len=128) :: carg -! integer :: stat -! -! carg = get_str_value(cname, iarg) -! call str2int(carg, value, stat) -! -! if (stat /= 0) then -! call parsing_failed("Invalid argument for " // trim(cname) // ": " // trim(carg)) -! end if -! -!end function -! -!!=================================================================================================== -! -!function get_str_value(cname, iarg) result(value) -! -! character(len=128) :: value -! character(len=*), intent(in) :: cname -! integer, intent(inout) :: iarg -! -! iarg = iarg + 1 -! call get_command_argument(iarg, value) -! -! if (value == "") then -! call parsing_failed("Invalid argument for " // trim(cname) // ": no value provided") -! end if -! -!end function -! -!!=================================================================================================== -! -!subroutine parsing_failed(message) -! -! character(len=*), intent(in) :: message -! if (luse_mpi) call mpl_init(ldinfo=.false.) -! if (ec_mpirank() == 0) then -! write(nerr,"(a)") trim(message) -! call print_help(unit=nerr) -! endif -! if (luse_mpi) call mpl_end(ldmeminfo=.false.) -! stop -! -!end subroutine -! -!!=================================================================================================== -! -!subroutine get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, lscders, luvders, & -! & luseflt, nproma, verbosity, ldump_values, lprint_norms, & -! & lmeminfo, nprtrv, nprtrw, ncheck) -! -! integer, intent(inout) :: nsmax ! Spectral truncation -! character(len=16), intent(inout) :: cgrid ! Spectral truncation -! integer, intent(inout) :: iters ! Number of iterations for transform test -! integer, intent(inout) :: nfld ! Number of scalar fields -! integer, intent(inout) :: nlev ! Number of vertical levels -! logical, intent(inout) :: lvordiv ! Also transform vorticity/divergence -! logical, intent(inout) :: lscders ! Compute scalar derivatives -! logical, intent(inout) :: luvders ! Compute uv East-West derivatives -! logical, intent(inout) :: luseflt ! Use fast Legendre transforms -! integer, intent(inout) :: nproma ! NPROMA -! integer, intent(inout) :: verbosity ! Level of verbosity -! logical, intent(inout) :: ldump_values ! Dump values of grid point fields for debugging -! logical, intent(inout) :: lprint_norms ! Calculate and print spectral norms of fields -! logical, intent(inout) :: lmeminfo ! Show information from FIAT ec_meminfo routine at the -! ! end -! integer, intent(inout) :: nprtrv ! Size of V set (spectral decomposition) -! integer, intent(inout) :: nprtrw ! Size of W set (spectral decomposition) -! integer, intent(inout) :: ncheck ! The multiplier of the machine epsilon used as a -! ! tolerance for correctness checking -! -! character(len=128) :: carg ! Storage variable for command line arguments -! integer :: iarg = 1 ! Argument index -! integer :: stat ! For storing success status of string->integer conversion -! integer :: myproc -! -! do while (iarg <= command_argument_count()) -! call get_command_argument(iarg, carg) -! -! select case(carg) -! ! Parse help argument -! case('-h', '--help') -! if (luse_mpi) call mpl_init(ldinfo=.false.) -! if (ec_mpirank()==0) call print_help() -! if (luse_mpi) call mpl_end(ldmeminfo=.false.) -! stop -! ! Parse verbosity argument -! case('-v') -! verbosity = 1 -! ! Parse number of iterations argument -! case('-n', '--niter') -! iters = get_int_value('-n', iarg) -! if (iters < 1) then -! call parsing_failed("Invalid argument for -n: must be > 0") -! end if -! ! Parse spectral truncation argument -! case('-t', '--truncation') -! nsmax = get_int_value('-t', iarg) -! if (nsmax < 1) then -! call parsing_failed("Invalid argument for -t: must be > 0") -! end if -! case('-g', '--grid'); cgrid = get_str_value('-g', iarg) -! case('-f', '--nfld'); nfld = get_int_value('-f', iarg) -! case('-l', '--nlev'); nlev = get_int_value('-l', iarg) -! case('--vordiv'); lvordiv = .True. -! case('--scders'); lscders = .True. -! case('--uvders'); luvders = .True. -! case('--flt'); luseflt = .True. -! case('--nproma'); nproma = get_int_value('--nproma', iarg) -! case('--dump-values'); ldump_values = .true. -! case('--norms'); lprint_norms = .true. -! case('--meminfo'); lmeminfo = .true. -! case('--nprtrv'); nprtrv = get_int_value('--nprtrv', iarg) -! case('--nprtrw'); nprtrw = get_int_value('--nprtrw', iarg) -! case('-c', '--check'); ncheck = get_int_value('-c', iarg) -! case default -! call parsing_failed("Unrecognised argument: " // trim(carg)) -! -! end select -! iarg = iarg + 1 -! end do -! -! if (.not. lvordiv) then -! luvders = .false. -! endif -! -!end subroutine get_command_line_arguments -! -!!=================================================================================================== -! -!function cubic_octahedral_gaussian_grid(nsmax) result(cgrid) -! -! character(len=16) :: cgrid -! integer, intent(in) :: nsmax -! write(cgrid,'(a,i0)') 'O',nsmax+1 -! -!end function -! -!!=================================================================================================== -! -!subroutine str2int(str, int, stat) -! -! character(len=*), intent(in) :: str -! integer, intent(out) :: int -! integer, intent(out) :: stat -! read(str, *, iostat=stat) int -! -!end subroutine str2int -! -!=================================================================================================== subroutine sort(a, n) @@ -1007,78 +623,6 @@ subroutine sort(a, n) end do end subroutine sort - -!=================================================================================================== -! -!subroutine print_help(unit) -! -! integer, optional :: unit -! integer :: nout = 6 -! if (present(unit)) then -! nout = unit -! endif -! -! write(nout, "(a)") "" -! -! if (jprb == jprd) then -! write(nout, "(a)") "NAME ectrans-benchmark-dp" -! else -! write(nout, "(a)") "NAME ectrans-benchmark-sp" -! end if -! write(nout, "(a)") "" -! -! write(nout, "(a)") "DESCRIPTION" -! write(nout, "(a)") " This program tests ecTrans by transforming fields back and forth& -! & between spectral " -! if (jprb == jprd) then -! write(nout, "(a)") " space and grid-point space (double-precision version)" -! else -! write(nout, "(a)") " space and grid-point space (single-precision version)" -! end if -! write(nout, "(a)") "" -! -! write(nout, "(a)") "USAGE" -! if (jprb == jprd) then -! write(nout, "(a)") " ectrans-benchmark-dp [options]" -! else -! write(nout, "(a)") " ectrans-benchmark-sp [options]" -! end if -! write(nout, "(a)") "" -! -! write(nout, "(a)") "OPTIONS" -! write(nout, "(a)") " -h, --help Print this message" -! write(nout, "(a)") " -v Run with verbose output" -! write(nout, "(a)") " -t, --truncation T Run with this triangular spectral truncation& -! & (default = 79)" -! write(nout, "(a)") " -g, --grid GRID Run with this grid. Possible values: O, F" -! write(nout, "(a)") " If not specified, O is used with N=truncation+1& -! & (cubic relation)" -! write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& -! & iterations (default = 10)" -! write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" -! write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" -! write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" -! write(nout, "(a)") " --scders Compute scalar derivatives (default off)" -! write(nout, "(a)") " --uvders Compute uv East-West derivatives (default off). Only& -! & when also --vordiv is given" -! write(nout, "(a)") " --flt Run with fast Legendre transforms (default off)" -! write(nout, "(a)") " --nproma NPROMA Run with NPROMA (default no blocking: NPROMA=ngptot)" -! write(nout, "(a)") " --norms Calculate and print spectral norms of transformed& -! & fields" -! write(nout, "(a)") " The computation of spectral norms will skew overall& -! & timings" -! write(nout, "(a)") " --meminfo Show diagnostic information from FIAT's ec_meminfo& -! & subroutine on memory usage, thread-binding etc." -! write(nout, "(a)") " --nprtrv Size of V set in spectral decomposition" -! write(nout, "(a)") " --nprtrw Size of W set in spectral decomposition" -! write(nout, "(a)") " -c, --check VALUE The multiplier of the machine epsilon used as a& -! & tolerance for correctness checking" -! write(nout, "(a)") "" -! write(nout, "(a)") "DEBUGGING" -! write(nout, "(a)") " --dump-values Output gridpoint fields in unformatted binary file" -! write(nout, "(a)") "" -! -!end subroutine print_help ! !=================================================================================================== @@ -1173,57 +717,6 @@ subroutine dump_gridpoint_field(jstep, myproc, nproma, ngpblks, fld, fldchar, no end subroutine dump_gridpoint_field -!=================================================================================================== -! -!function detect_mpirun() result(lmpi_required) -! logical :: lmpi_required -! integer :: ilen -! integer, parameter :: nvars = 5 -! character(len=32), dimension(nvars) :: cmpirun_detect -! character(len=4) :: clenv_dr_hook_assert_mpi_initialized -! integer :: ivar -! -! ! Environment variables that are set when mpirun, srun, aprun, ... are used -! cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi -! cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe -! cmpirun_detect(3) = 'PMI_SIZE' ! intel -! cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm -! cmpirun_detect(5) = 'ECTRANS_USE_MPI' ! forced -! -! lmpi_required = .false. -! do ivar = 1, nvars -! call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) -! if (ilen > 0) then -! lmpi_required = .true. -! exit ! break -! endif -! enddo -!end function -! -!!=================================================================================================== -! -!! Assign GSTATS labels to the main regions of ecTrans -!subroutine gstats_labels -! -! call gstats_label(0, ' ', 'PROGRAM - Total') -! call gstats_label(1, ' ', 'SETUP_TRANS0 - Setup ecTrans') -! call gstats_label(2, ' ', 'SETUP_TRANS - Setup ecTrans handle') -! call gstats_label(3, ' ', 'TIME STEP - Time step') -! call gstats_label(4, ' ', 'INV_TRANS - Inverse transform') -! call gstats_label(5, ' ', 'DIR_TRANS - Direct transform') -! call gstats_label(6, ' ', 'NORMS - Norm comp. (optional)') -! call gstats_label(102, ' ', 'LTINV_CTL - Inv. Legendre transform') -! call gstats_label(103, ' ', 'LTDIR_CTL - Dir. Legendre transform') -! call gstats_label(106, ' ', 'FTDIR_CTL - Dir. Fourier transform') -! call gstats_label(107, ' ', 'FTINV_CTL - Inv. Fourier transform') -! call gstats_label(140, ' ', 'SULEG - Comp. of Leg. poly.') -! call gstats_label(152, ' ', 'LTINV_CTL - M to L transposition') -! call gstats_label(153, ' ', 'LTDIR_CTL - L to M transposition') -! call gstats_label(157, ' ', 'FTINV_CTL - L to G transposition') -! call gstats_label(158, ' ', 'FTDIR_CTL - G to L transposition') -! call gstats_label(400, ' ', 'GSTATS - GSTATS itself') -! -!end subroutine gstats_labels end module transform_driver diff --git a/src/programs/ectrans-benchmark-program.F90 b/src/programs/ectrans-benchmark-program.F90 index 0378fe89..a5a53050 100644 --- a/src/programs/ectrans-benchmark-program.F90 +++ b/src/programs/ectrans-benchmark-program.F90 @@ -40,7 +40,7 @@ program transform_test ! Sam Hatfield ! -use parkind1, only: jpim, jprb, jprd +use parkind1, only: jpim, jprd use oml_mod ,only : oml_max_threads use mpl_module use yomgstats, only: jpmaxstat @@ -59,12 +59,7 @@ program transform_test implicit none -!! Number of points in top/bottom latitudes -!integer(kind=jpim), parameter :: min_octa_points = 20 -! integer(kind=jpim) :: istack, getstackusage -!real(kind=jprd), dimension(1) :: zmaxerr(5), zerr(5) -!real(kind=jprd) :: zmaxerrg ! ! Output unit numbers integer(kind=jpim), parameter :: nerr = 0 ! Unit number for STDERR @@ -95,32 +90,9 @@ program transform_test integer(kind=jpim) :: myproc, jj integer :: jstep ! -real(kind=jprd) :: ztinit, ztloop, timef !, ztstepmax, ztstepmin, ztstepavg, ztstepmed -!real(kind=jprd) :: ztstepmax1, ztstepmin1, ztstepavg1, ztstepmed1 -!real(kind=jprd) :: ztstepmax2, ztstepmin2, ztstepavg2, ztstepmed2 -!real(kind=jprd), allocatable :: ztstep(:), ztstep1(:), ztstep2(:) -!! -!!real(kind=jprb), allocatable :: znormsp(:), znormsp1(:), znormdiv(:), znormdiv1(:) -!!real(kind=jprb), allocatable :: znormvor(:), znormvor1(:), znormt(:), znormt1(:) +real(kind=jprd) :: ztinit, ztloop, timef real(kind=jprd) :: zaveave(0:jpmaxstat) -!! -!!! Grid-point space data structures -!!real(kind=jprb), allocatable, target :: zgmv (:,:,:,:) ! Multilevel fields at t and t-dt -!!real(kind=jprb), allocatable, target :: zgmvs (:,:,:) ! Single level fields at t and t-dt -!!real(kind=jprb), pointer :: zgp3a (:,:,:,:) ! Multilevel fields at t and t-dt -!!real(kind=jprb), pointer :: zgpuv (:,:,:,:) ! Multilevel fields at t and t-dt -!!real(kind=jprb), pointer :: zgp2 (:,:,:) ! Single level fields at t and t-dt -!! -!!! Spectral space data structures -!!real(kind=jprb), allocatable, target :: sp3d(:,:,:) -!!real(kind=jprb), pointer :: zspvor(:,:) => null() -!!real(kind=jprb), pointer :: zspdiv(:,:) => null() -!!real(kind=jprb), pointer :: zspsc3a(:,:,:) => null() -!!real(kind=jprb), allocatable :: zspsc2(:,:) -! logical :: lstack = .false. ! Output stack info -!logical :: luserpnm = .false. -!logical :: lkeeprpnm = .false. logical :: luseflt = .false. ! Use fast legendre transforms logical :: ltrace_stats = .false. logical :: lstats_omp = .false. @@ -135,11 +107,10 @@ program transform_test logical :: lstatscpu = .false. logical :: lstats_mem = .false. logical :: lxml_stats = .false. -!logical :: lfftw = .true. ! Use FFTW for Fourier transforms logical :: lvordiv = .false. logical :: lscders = .false. logical :: luvders = .false. -logical :: lprint_norms = .false. ! Calculate and print spectral norms +logical :: lprint_norms = .true. ! Calculate and print spectral norms logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end ! integer(kind=jpim) :: nstats_mem = 0 @@ -155,9 +126,6 @@ program transform_test ! Verbosity level (0 or 1) integer :: verbosity = 0 ! -!real(kind=jprd) :: zra = 6371229._jprd -! -!integer(kind=jpim) :: npromatr = 0 ! nproma for trans lib !integer(kind=jpim) :: ncombflen = 1800000 ! Size of comm buffer ! integer(kind=jpim) :: nproc ! Number of procs @@ -179,9 +147,6 @@ program transform_test ! ! sumpini integer(kind=jpim) :: isqr -!logical :: lsync_trans = .true. ! Activate barrier sync -!logical :: leq_regions = .true. ! Eq regions flag -! ! integer(kind=jpim) :: nproma = 0 integer(kind=jpim) :: ngpblks @@ -190,10 +155,6 @@ program transform_test integer(kind=jpim) :: iprtrw integer(kind=jpim) :: iprused, ilevpp, irest, ilev, jlev ! -!integer(kind=jpim) :: ndimgmv = 0 ! Third dim. of gmv "(nproma,nflevg,ndimgmv,ngpblks)" -!integer(kind=jpim) :: ndimgmvs = 0 ! Second dim. gmvs "(nproma,ndimgmvs,ngpblks)" -!! -! logical :: ldump_values = .false. ! integer, external :: ec_mpirank @@ -360,7 +321,6 @@ program transform_test nflevl = numll(mysetv) ivsetsc(1) = iprused -!ifld = 0 !=================================================================================================== ! Setup gstats @@ -386,21 +346,13 @@ program transform_test call gstats(1, 0) call ectrans_setup0(kout=nout, kerr=nerr, kverbosity=verbosity, kprgpns=nprgpns, kprgpew=nprgpew, & & kprtrw=nprtrw, lduse_mpi=.not.luse_mpi) -!call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & -! & kmax_resol=nmax_resol, kpromatr=npromatr, kprgpns=nprgpns, kprgpew=nprgpew, & -! & kprtrw=nprtrw, kcombflen=ncombflen, ldsync_trans=lsync_trans, & -! & ldeq_regions=leq_regions, prad=zra, ldalloperm=.true., ldmpoff=.not.luse_mpi) call gstats(1, 1) ! call gstats(2, 0) call ectrans_setup(ksmax=nsmax, kdgl=ndgl, kloen=nloen, lduseflt=luseflt) -!call setup_trans(ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., & -! & ldusefftw=lfftw, lduserpnm=luserpnm, ldkeeprpnm=lkeeprpnm, & -! & lduseflt=luseflt) call gstats(2, 1) ! call ectrans_trans_inq(kspec2=nspec2, kspec2g=nspec2g, kgptot=ngptot, kgptotg=ngptotg) -!call trans_inq(kspec2=nspec2, kspec2g=nspec2g, kgptot=ngptot, kgptotg=ngptotg) if (nproma == 0) then ! no blocking (default when not specified) @@ -424,21 +376,6 @@ program transform_test !=================================================================================================== call ectrans_allocate_spectral(nflevl,nspec2,nfld,nsmax) -!! Allocate spectral arrays -!! Try to mimick IFS layout as much as possible -!nullify(zspvor) -!nullify(zspdiv) -!nullify(zspsc3a) -!allocate(sp3d(nflevl,nspec2,2+nfld)) -!allocate(zspsc2(1,nspec2)) -! -!call initialize_spectral_arrays(nsmax, zspsc2, sp3d) -! -!! Point convenience variables to storage variable sp3d -!zspvor => sp3d(:,:,1) -!zspdiv => sp3d(:,:,2) -!zspsc3a => sp3d(:,:,3:3+(nfld-1)) -! !!=================================================================================================== !! Allocate gridpoint arrays !!=================================================================================================== @@ -455,13 +392,7 @@ program transform_test ! ! call ectrans_allocate_grid(nproma, ngpblks, nfld, nflevg, lvordiv, luvders, lscders) -!allocate(zgmv(nproma,nflevg,ndimgmv,ngpblks)) -!allocate(zgmvs(nproma,ndimgmvs,ngpblks)) -! -!zgpuv => zgmv(:,:,1:jend_vder_EW,:) -!zgp3a => zgmv(:,:,jbegin_sc:jend_scder_EW,:) -!zgp2 => zgmvs(:,:,:) -! + !!=================================================================================================== !! Allocate norm arrays !!=================================================================================================== @@ -469,34 +400,8 @@ program transform_test if (lprint_norms .or. ncheck > 0) then call ectrans_allocate_normdata(nflevl=nflevl, nflevg=nflevg) call ectrans_calculate_norms(indx=1, nflevl=nflevl, nflevg=nflevg, ivset=ivset,ivsetsc=ivsetsc) -! allocate(znormsp(1)) -! allocate(znormsp1(1)) -! allocate(znormvor(nflevg)) -! allocate(znormvor1(nflevg)) -! allocate(znormdiv(nflevg)) -! allocate(znormdiv1(nflevg)) -! allocate(znormt(nflevg)) -! allocate(znormt1(nflevg)) -! -! call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor1, kvset=ivset(1:nflevg)) -! call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv1, kvset=ivset(1:nflevg)) -! call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt1, kvset=ivset(1:nflevg)) -! call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp1, kvset=ivsetsc) -! if (verbosity >= 1) then call ectrans_print_norms_init(nout,nflevg) -! do ifld = 1, nflevg -! write(nout,'("norm zspvor( ",i4,",:) = ",f20.15)') ifld, znormvor1(ifld) -! enddo -! do ifld = 1, nflevg -! write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15)') ifld, znormdiv1(ifld) -! enddo -! do ifld = 1, nflevg -! write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15)') ifld, znormt1(ifld) -! enddo -! do ifld = 1, 1 -! write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15)') ifld, znormsp1(ifld) -! enddo endif endif ! @@ -514,21 +419,6 @@ program transform_test endif if (iters <= 0) call abor1('transform_test:iters <= 0') -! -!allocate(ztstep(iters)) -!allocate(ztstep1(iters)) -!allocate(ztstep2(iters)) -! -!ztstepavg = 0._jprd -!ztstepmax = 0._jprd -!ztstepmin = 9999999999999999._jprd -!ztstepavg1 = 0._jprd -!ztstepmax1 = 0._jprd -!ztstepmin1 = 9999999999999999._jprd -!ztstepavg2 = 0._jprd -!ztstepmax2 = 0._jprd -!ztstepmin2 = 9999999999999999._jprd -! call ectrans_allocate_timers(iters) write(nout,'(a)') '======= Start of spectral transforms =======' @@ -553,48 +443,20 @@ program transform_test call gstats(4,0) if (lvordiv) then call ectrans_inv_trans(nproma=nproma,lscders=lscders,luvders=luvders,ivset=ivset,ivsetsc=ivsetsc) -! call inv_trans(kresol=1, kproma=nproma, & -! & pspsc2=zspsc2, & ! spectral surface pressure -! & pspvor=zspvor, & ! spectral vorticity -! & pspdiv=zspdiv, & ! spectral divergence -! & pspsc3a=zspsc3a, & ! spectral scalars -! & ldscders=lscders, & -! & ldvorgp=.false., & ! no gridpoint vorticity -! & lddivgp=.false., & ! no gridpoint divergence -! & lduvder=luvders, & -! & kvsetuv=ivset, & -! & kvsetsc2=ivsetsc, & -! & kvsetsc3a=ivset, & -! & pgp2=zgp2, & -! & pgpuv=zgpuv, & -! & pgp3a=zgp3a) else call ectrans_inv_trans(nproma=nproma,lscders=lscders,ivset=ivset,ivsetsc=ivsetsc) -! call inv_trans(kresol=1, kproma=nproma, & -! & pspsc2=zspsc2, & ! spectral surface pressure -! & pspsc3a=zspsc3a, & ! spectral scalars -! & ldscders=lscders, & ! scalar derivatives -! & kvsetsc2=ivsetsc, & -! & kvsetsc3a=ivset, & -! & pgp2=zgp2, & -! & pgp3a=zgp3a) endif call gstats(4,1) ! call ectrans_set_ztstep_end(indx=1,jstep=jstep) -! ztstep1(jstep) = (timef() - ztstep1(jstep))/1000.0_jprd ! ! !================================================================================================= ! ! While in grid point space, dump the values to disk, for debugging only ! !================================================================================================= ! if (ldump_values) then - call ectrans_dump(jstep, myproc, nproma, ngpblks, nflevg, noutdump) ! ! dump a field to a binary file -! call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgp2(:,1,:), 'S', noutdump) -! call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgpuv(:,nflevg,1,:), 'U', noutdump) -! call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgpuv(:,nflevg,2,:), 'V', noutdump) -! call dump_gridpoint_field(jstep, myproc, nproma, ngpblks, zgp3a(:,nflevg,1,:), 'T', noutdump) + call ectrans_dump(jstep, myproc, nproma, ngpblks, nflevg, noutdump) endif !================================================================================================= @@ -602,54 +464,20 @@ program transform_test !================================================================================================= call ectrans_set_ztstep_start(indx=2,jstep=jstep) -! ztstep2(jstep) = timef() call gstats(5,0) if (lvordiv) then call ectrans_direct_trans(nproma,nfld,ivset,ivsetsc,lvordiv) -! call dir_trans(kresol=1, kproma=nproma, & -! & pgp2=zgmvs(:,1:1,:), & -! & pgpuv=zgpuv(:,:,1:2,:), & -! & pgp3a=zgp3a(:,:,1:nfld,:), & -! & pspvor=zspvor, & -! & pspdiv=zspdiv, & -! & pspsc2=zspsc2, & -! & pspsc3a=zspsc3a, & -! & kvsetuv=ivset, & -! & kvsetsc2=ivsetsc, & -! & kvsetsc3a=ivset) else call ectrans_direct_trans(nproma,nfld,ivset,ivsetsc) -! call dir_trans(kresol=1, kproma=nproma, & -! & pgp2=zgmvs(:,1:1,:), & -! & pgp3a=zgp3a(:,:,1:nfld,:), & -! & pspsc2=zspsc2, & -! & pspsc3a=zspsc3a, & -! & kvsetsc2=ivsetsc, & -! & kvsetsc3a=ivset) endif call gstats(5,1) -! ztstep2(jstep) = (timef() - ztstep2(jstep))/1000.0_jprd call ectrans_set_ztstep_end(indx=2,jstep=jstep) !================================================================================================= ! Calculate timings !================================================================================================= call ectrans_calculate_timings(jstep) - -! ztstep(jstep) = (timef() - ztstep(jstep))/1000.0_jprd -! -! ztstepavg = ztstepavg + ztstep(jstep) -! ztstepmin = min(ztstep(jstep), ztstepmin) -! ztstepmax = max(ztstep(jstep), ztstepmax) -! -! ztstepavg1 = ztstepavg1 + ztstep1(jstep) -! ztstepmin1 = min(ztstep1(jstep), ztstepmin1) -! ztstepmax1 = max(ztstep1(jstep), ztstepmax1) -! -! ztstepavg2 = ztstepavg2 + ztstep2(jstep) -! ztstepmin2 = min(ztstep2(jstep), ztstepmin2) -! ztstepmax2 = max(ztstep2(jstep), ztstepmax2) !================================================================================================= ! Print norms @@ -658,40 +486,7 @@ program transform_test if (lprint_norms) then call gstats(6,0) call ectrans_calculate_norms(indx=0, nflevl=nflevl, nflevg=nflevg, ivset=ivset,ivsetsc=ivsetsc) -! call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc(1:1)) -! call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset(1:nflevg)) -! call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset(1:nflevg)) -! call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset(1:nflevg)) -! call ectrans_print_norms_calc(nout, jstep, myproc,nflevg) -! ! Surface pressure -! if (myproc == 1) then -! zmaxerr(:) = -999.0 -! do ifld = 1, 1 -! write(nout,*) "znormsp", znormsp -! call flush(nout) -! zerr(1) = abs(znormsp1(ifld)/znormsp(ifld) - 1.0_jprd) -! zmaxerr(1) = max(zmaxerr(1), zerr(1)) -! enddo -! ! Divergence -! do ifld = 1, nflevg -! zerr(2) = abs(znormdiv1(ifld)/znormdiv(ifld) - 1.0_jprd) -! zmaxerr(2) = max(zmaxerr(2), zerr(2)) -! enddo -! ! Vorticity -! do ifld = 1, nflevg -! zerr(3) = abs(znormvor1(ifld)/znormvor(ifld) - 1.0_jprd) -! zmaxerr(3) = max(zmaxerr(3),zerr(3)) -! enddo -! ! Temperature -! do ifld = 1, nflevg -! zerr(4) = abs(znormt1(ifld)/znormt(ifld) - 1.0_jprd) -! zmaxerr(4) = max(zmaxerr(4), zerr(4)) -! enddo -! write(nout,'("time step ",i6," took", f8.4," | zspvor max err="e10.3,& -! & " | zspdiv max err="e10.3," | zspsc3a max err="e10.3," | zspsc2 max err="e10.3)') & -! & jstep, ztstep(jstep), zmaxerr(3), zmaxerr(2), zmaxerr(4), zmaxerr(1) -! endif call gstats(6,1) else call ectrans_print_timestep(nout,jstep) @@ -710,54 +505,8 @@ program transform_test ! if (lprint_norms .or. ncheck > 0) then call ectrans_calculate_norms(indx=2, nflevl=nflevl, nflevg=nflevg, ivset=ivset,ivsetsc=ivsetsc) -! call specnorm(pspec=zspvor(1:nflevl,:), pnorm=znormvor, kvset=ivset) -! call specnorm(pspec=zspdiv(1:nflevl,:), pnorm=znormdiv, kvset=ivset) -! call specnorm(pspec=zspsc3a(1:nflevl,:,1), pnorm=znormt, kvset=ivset) -! call specnorm(pspec=zspsc2(1:1,:), pnorm=znormsp, kvset=ivsetsc) -! if (myproc == 1) then call ectrans_print_norms_fin(nout,nflevg,myproc) -! zmaxerr(:) = -999.0 -! do ifld = 1, nflevg -! zerr(3) = abs(real(znormvor1(ifld),kind=jprd)/real(znormvor(ifld),kind=jprd) - 1.0_jprd) -! zmaxerr(3) = max(zmaxerr(3), zerr(3)) -! if (verbosity >= 1) then -! write(nout,'("norm zspvor( ",i4,") = ",f20.15," error = ",e10.3)') ifld, znormvor1(ifld), zerr(3) -! endif -! enddo -! do ifld = 1, nflevg -! zerr(2) = abs(real(znormdiv1(ifld),kind=jprd)/real(znormdiv(ifld),kind=jprd) - 1.0d0) -! zmaxerr(2) = max(zmaxerr(2),zerr(2)) -! if (verbosity >= 1) then -! write(nout,'("norm zspdiv( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormdiv1(ifld), zerr(2) -! endif -! enddo -! do ifld = 1, nflevg -! zerr(4) = abs(real(znormt1(ifld),kind=jprd)/real(znormt(ifld),kind=jprd) - 1.0d0) -! zmaxerr(4) = max(zmaxerr(4), zerr(4)) -! if (verbosity >= 1) then -! write(nout,'("norm zspsc3a(",i4,",:,1) = ",f20.15," error = ",e10.3)') ifld, znormt1(ifld), zerr(4) -! endif -! enddo -! do ifld = 1, 1 -! zerr(1) = abs(real(znormsp1(ifld),kind=jprd)/real(znormsp(ifld),kind=jprd) - 1.0d0) -! zmaxerr(1) = max(zmaxerr(1), zerr(1)) -! if (verbosity >= 1) then -! write(nout,'("norm zspsc2( ",i4,",:) = ",f20.15," error = ",e10.3)') ifld, znormsp1(ifld), zerr(1) -! endif -! enddo -! -! ! maximum error across all fields -! zmaxerrg = max(max(zmaxerr(1),zmaxerr(2)), max(zmaxerr(2), zmaxerr(3))) -! -! if (verbosity >= 1) write(nout,*) -! write(nout,'("max error zspvor(1:nlev,:) = ",e10.3)') zmaxerr(3) -! write(nout,'("max error zspdiv(1:nlev,:) = ",e10.3)') zmaxerr(2) -! write(nout,'("max error zspsc3a(1:nlev,:,1) = ",e10.3)') zmaxerr(4) -! write(nout,'("max error zspsc2(1:1,:) = ",e10.3)') zmaxerr(1) -! write(nout,*) -! write(nout,'("max error combined = = ",e10.3)') zmaxerrg -! write(nout,*) endif if (ncheck > 0) then ierr = 0 @@ -765,14 +514,6 @@ program transform_test ! ! If the maximum spectral norm error across all fields is greater than 100 times the machine ! ! epsilon, fail the test ierr=ectrans_print_norms_fails(nout,ncheck) -! if (zmaxerrg > real(ncheck, jprd) * epsilon(1.0_jprd)) then -! write(nout, '(a)') '*******************************' -! write(nout, '(a)') 'Correctness test failed' -! write(nout, '(a,1e7.2)') 'Maximum spectral norm error = ', zmaxerrg -! write(nout, '(a,1e7.2)') 'Error tolerance = ', real(ncheck, jprd) * epsilon(1.0_jprd) -! write(nout, '(a)') '*******************************' -! ierr = 1 -! endif endif ! ! Root rank broadcasts the correctness checker result to the other ranks @@ -789,70 +530,9 @@ program transform_test ! if (luse_mpi) then call ectrans_norms_reduce(ztloop) -! call mpl_allreduce(ztloop, 'sum', ldreprod=.false.) -! call mpl_allreduce(ztstep, 'sum', ldreprod=.false.) -! call mpl_allreduce(ztstepavg, 'sum', ldreprod=.false.) -! call mpl_allreduce(ztstepmax, 'max', ldreprod=.false.) -! call mpl_allreduce(ztstepmin, 'min', ldreprod=.false.) -! -! call mpl_allreduce(ztstep1, 'sum', ldreprod=.false.) -! call mpl_allreduce(ztstepavg1, 'sum', ldreprod=.false.) -! call mpl_allreduce(ztstepmax1, 'max', ldreprod=.false.) -! call mpl_allreduce(ztstepmin1, 'min', ldreprod=.false.) -! -! call mpl_allreduce(ztstep2, 'sum', ldreprod=.false.) -! call mpl_allreduce(ztstepavg2, 'sum', ldreprod=.false.) -! call mpl_allreduce(ztstepmax2, 'max', ldreprod=.false.) -! call mpl_allreduce(ztstepmin2, 'min', ldreprod=.false.) endif call ectrans_compute_time_stats(nproc,iters) -! -!ztstepavg = (ztstepavg/real(nproc,jprd))/real(iters,jprd) -!ztloop = ztloop/real(nproc,jprd) -!ztstep(:) = ztstep(:)/real(nproc,jprd) -! -!call sort(ztstep,iters) -!ztstepmed = ztstep(iters/2) -! -!ztstepavg1 = (ztstepavg1/real(nproc,jprd))/real(iters,jprd) -!ztstep1(:) = ztstep1(:)/real(nproc,jprd) -! -!call sort(ztstep1, iters) -!ztstepmed1 = ztstep1(iters/2) -! -!ztstepavg2 = (ztstepavg2/real(nproc,jprd))/real(iters,jprd) -!ztstep2(:) = ztstep2(:)/real(nproc,jprd) -! -!call sort(ztstep2,iters) -!ztstepmed2 = ztstep2(iters/2) call ectrans_print_time_stats(nout,ztloop,nproc) -! -!write(nout,'(a)') '======= Start of time step stats =======' -!write(nout,'(" ")') -!write(nout,'("Inverse transforms")') -!write(nout,'("------------------")') -!write(nout,'("avg (s): ",f8.4)') ztstepavg1 -!write(nout,'("min (s): ",f8.4)') ztstepmin1 -!write(nout,'("max (s): ",f8.4)') ztstepmax1 -!write(nout,'("med (s): ",f8.4)') ztstepmed1 -!write(nout,'(" ")') -!write(nout,'("Direct transforms")') -!write(nout,'("-----------------")') -!write(nout,'("avg (s): ",f8.4)') ztstepavg2 -!write(nout,'("min (s): ",f8.4)') ztstepmin2 -!write(nout,'("max (s): ",f8.4)') ztstepmax2 -!write(nout,'("med (s): ",f8.4)') ztstepmed2 -!write(nout,'(" ")') -!write(nout,'("Inverse-direct transforms")') -!write(nout,'("-------------------------")') -!write(nout,'("avg (s): ",f8.4)') ztstepavg -!write(nout,'("min (s): ",f8.4)') ztstepmin -!write(nout,'("max (s): ",f8.4)') ztstepmax -!write(nout,'("med (s): ",f8.4)') ztstepmed -!write(nout,'("loop (s): ",f8.4)') ztloop -!write(nout,'(" ")') -!write(nout,'(a)') '======= End of time step stats =======' -!write(nout,'(" ")') ! if (lstack) then ! Gather stack usage statistics @@ -1126,32 +806,6 @@ subroutine compute_grid_extents() enddo end subroutine compute_grid_extents -!!=================================================================================================== -! -!subroutine sort(a, n) -! -! real(kind=jprd), intent(inout) :: a(n) -! integer(kind=jpim), intent(in) :: n -! -! real(kind=jprd) :: x -! -! integer :: i, j -! -! do i = 2, n -! x = a(i) -! j = i - 1 -! do while (j >= 1) -! if (a(j) <= x) exit -! a(j + 1) = a(j) -! j = j - 1 -! end do -! a(j + 1) = x -! end do -! -!end subroutine sort -! -!=================================================================================================== - subroutine print_help(unit) integer, optional :: unit @@ -1162,30 +816,30 @@ subroutine print_help(unit) write(nout, "(a)") "" - if (jprb == jprd) then - write(nout, "(a)") "NAME ectrans-benchmark-dp" - else - write(nout, "(a)") "NAME ectrans-benchmark-sp" - end if - write(nout, "(a)") "" - - write(nout, "(a)") "DESCRIPTION" - write(nout, "(a)") " This program tests ecTrans by transforming fields back and forth& - & between spectral " - if (jprb == jprd) then - write(nout, "(a)") " space and grid-point space (double-precision version)" - else - write(nout, "(a)") " space and grid-point space (single-precision version)" - end if - write(nout, "(a)") "" - - write(nout, "(a)") "USAGE" - if (jprb == jprd) then - write(nout, "(a)") " ectrans-benchmark-dp [options]" - else - write(nout, "(a)") " ectrans-benchmark-sp [options]" - end if - write(nout, "(a)") "" +! if (jprb == jprd) then +! write(nout, "(a)") "NAME ectrans-benchmark-dp" +! else +! write(nout, "(a)") "NAME ectrans-benchmark-sp" +! end if +! write(nout, "(a)") "" + +! write(nout, "(a)") "DESCRIPTION" +! write(nout, "(a)") " This program tests ecTrans by transforming fields back and forth& +! & between spectral " +! if (jprb == jprd) then +! write(nout, "(a)") " space and grid-point space (double-precision version)" +! else +! write(nout, "(a)") " space and grid-point space (single-precision version)" +! end if +! write(nout, "(a)") "" + +! write(nout, "(a)") "USAGE" +! if (jprb == jprd) then +! write(nout, "(a)") " ectrans-benchmark-dp [options]" +! else +! write(nout, "(a)") " ectrans-benchmark-sp [options]" +! end if +! write(nout, "(a)") "" write(nout, "(a)") "OPTIONS" write(nout, "(a)") " -h, --help Print this message" @@ -1222,99 +876,6 @@ & subroutine on memory usage, thread-binding etc." end subroutine print_help -!!=================================================================================================== -! -!subroutine initialize_spectral_arrays(nsmax, zsp, sp3d) -! -! integer, intent(in) :: nsmax ! Spectral truncation -! real(kind=jprb), intent(inout) :: zsp(:,:) ! Surface pressure -! real(kind=jprb), intent(inout) :: sp3d(:,:,:) ! 3D fields -! -! integer(kind=jpim) :: nflevl -! integer(kind=jpim) :: nfield -! -! integer :: i, j -! -! nflevl = size(sp3d, 1) -! nfield = size(sp3d, 3) -! -! ! First initialize surface pressure -! call initialize_2d_spectral_field(nsmax, zsp(1,:)) -! -! ! Then initialize all of the 3D fields -! do i = 1, nflevl -! do j = 1, nfield -! call initialize_2d_spectral_field(nsmax, sp3d(i,:,j)) -! end do -! end do -! -!end subroutine initialize_spectral_arrays -! -!!=================================================================================================== -! -!subroutine initialize_2d_spectral_field(nsmax, field) -! -! integer, intent(in) :: nsmax ! Spectral truncation -! real(kind=jprb), intent(inout) :: field(:) ! Field to initialize -! -! integer :: i, index, num_my_zon_wns -! integer, allocatable :: my_zon_wns(:), nasm0(:) -! -! ! Choose a spherical harmonic to initialize arrays -! integer :: m_num = 4 ! Zonal wavenumber -! integer :: l_num = 19 ! Total wavenumber -! -! ! First initialise all spectral coefficients to zero -! field(:) = 0.0 -! -! ! Get zonal wavenumbers this rank is responsible for -! call trans_inq(knump=num_my_zon_wns) -! allocate(my_zon_wns(num_my_zon_wns)) -! call trans_inq(kmyms=my_zon_wns) -! -! ! If rank is responsible for the chosen zonal wavenumber... -! if (any(my_zon_wns == m_num) ) then -! ! Get array of spectral array addresses (this maps (m, n=m) to array index) -! allocate(nasm0(0:nsmax)) -! call trans_inq(kasm0=nasm0) -! -! ! Find out local array index of chosen spherical harmonic -! index = nasm0(m_num) + 2 * (l_num - m_num) + 1 -! -! ! Set just that element to a constant value -! field(index) = 1.0 -! else -! return -! end if -! -!end subroutine initialize_2d_spectral_field -! -!!=================================================================================================== -! -!subroutine dump_gridpoint_field(jstep, myproc, nproma, ngpblks, fld, fldchar, noutdump) -! -! ! Dump a 2d field to a binary file. -! -! integer(kind=jpim), intent(in) :: jstep ! Time step, used for naming file -! integer(kind=jpim), intent(in) :: myproc ! MPI rank, used for naming file -! integer(kind=jpim), intent(in) :: nproma ! Size of nproma -! integer(kind=jpim), intent(in) :: ngpblks ! Number of nproma blocks -! real(kind=jprb) , intent(in) :: fld(nproma,ngpblks) ! 2D field -! character , intent(in) :: fldchar ! Single character field identifier -! integer(kind=jpim), intent(in) :: noutdump ! Tnit number for output file -! -! character(len=14) :: filename = "x.xxx.xxxx.dat" -! -! write(filename(1:1),'(a1)') fldchar -! write(filename(3:5),'(i3.3)') jstep -! write(filename(7:10),'(i4.4)') myproc -! -! open(noutdump, file=filename, form="unformatted") -! write(noutdump) reshape(fld, (/ nproma*ngpblks /)) -! close(noutdump) -! -!end subroutine dump_gridpoint_field - !=================================================================================================== function detect_mpirun() result(lmpi_required) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index 8a2fb698..1d660478 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -128,7 +128,7 @@ program transform_test logical :: lvordiv = .false. logical :: lscders = .false. logical :: luvders = .false. -logical :: lprint_norms = .false. ! Calculate and print spectral norms +logical :: lprint_norms = .true. ! Calculate and print spectral norms logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end integer(kind=jpim) :: nstats_mem = 0