Skip to content

Commit

Permalink
move transp_R2Phi into communications.f90
Browse files Browse the repository at this point in the history
also define transp_Phi2R
  • Loading branch information
tgastine committed Jan 14, 2025
1 parent d0606a5 commit f28a7c0
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 96 deletions.
183 changes: 168 additions & 15 deletions src/communications.f90
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ module communications
use constants, only: zero
use precision_mod
use mem_alloc, only: memWrite, bytes_allocated
use parallel_mod, only: rank, n_procs, ierr
use parallel_mod, only: rank, n_procs, ierr, load
use truncation, only: l_max, lm_max, minc, n_r_max, n_r_ic_max, &
& fd_order, fd_order_bound, m_max, m_min
use blocking, only: st_map, lo_map, lm_balance, llm, ulm
use radial_data, only: nRstart, nRstop, radial_balance
use logic, only: l_mag, l_conv, l_heat, l_chemical_conv, l_finite_diff, &
& l_mag_kin, l_double_curl, l_save_out, l_packed_transp, &
& l_parallel_solve, l_mag_par_solve, l_phase_field
& l_parallel_solve, l_mag_par_solve
use useful, only: abortRun
use output_data, only: n_log_file, log_file
use iso_fortran_env, only: output_unit
Expand All @@ -41,7 +41,7 @@ module communications
end type gather_type

public :: gather_from_lo_to_rank0,scatter_from_rank0_to_lo, allgather_from_Rloc, &
& gather_all_from_lo_to_rank0, gather_from_Rloc
& gather_all_from_lo_to_rank0, gather_from_Rloc, transp_R2Phi, transp_Phi2R
public :: get_global_sum, finalize_communications, initialize_communications

#ifdef WITH_MPI
Expand Down Expand Up @@ -213,10 +213,10 @@ subroutine initialize_communications
allocate( type_mpiatoap :: lo2r_press )
end if

call lo2r_one%create_comm(1)
call r2lo_one%create_comm(1)
if ( l_packed_transp ) then
call lo2r_one%create_comm(1)
if ( l_finite_diff .and. fd_order==2 .and. fd_order_bound==2 ) then
call r2lo_one%create_comm(1)
if ( l_parallel_solve ) then
if ( l_mag .and. (.not. l_mag_par_solve) ) then
call lo2r_flow%create_comm(2)
Expand All @@ -232,7 +232,6 @@ subroutine initialize_communications
end if
end if
else
if ( l_phase_field ) call r2lo_one%create_comm(1)
if ( l_heat ) then
call lo2r_s%create_comm(2)
call r2lo_s%create_comm(2)
Expand All @@ -255,9 +254,6 @@ subroutine initialize_communications
call r2lo_field%create_comm(3)
end if
end if
else
call lo2r_one%create_comm(1)
call r2lo_one%create_comm(1)
end if

! allocate a temporary array for the gather operations.
Expand All @@ -278,16 +274,15 @@ subroutine finalize_communications
call destroy_gather_type(gt_OC)
call destroy_gather_type(gt_IC)

call lo2r_one%destroy_comm()
call r2lo_one%destroy_comm()
if ( l_packed_transp ) then
call lo2r_one%destroy_comm()
if ( l_finite_diff .and. fd_order==2 .and. fd_order_bound==2 ) then
call r2lo_one%destroy_comm()
if ( (.not. l_parallel_solve) .and. (.not. l_mag_par_solve) ) then
call lo2r_flow%destroy_comm()
call r2lo_flow%destroy_comm()
end if
else
if ( l_phase_field ) call r2lo_one%destroy_comm()
if ( l_heat ) then
call lo2r_s%destroy_comm()
call r2lo_s%destroy_comm()
Expand All @@ -306,9 +301,6 @@ subroutine finalize_communications
call r2lo_field%destroy_comm()
end if
end if
else
call lo2r_one%destroy_comm()
call r2lo_one%destroy_comm()
end if

deallocate( temp_gather_lo )
Expand Down Expand Up @@ -931,6 +923,167 @@ subroutine reduce_scalar(scal_dist, scal_glob, irank)
#endif

end subroutine reduce_scalar
!-------------------------------------------------------------------------------
subroutine transp_R2Phi(arr_Rloc, arr_Ploc, phi_balance, nPstart, nPstop)
!
! This subroutine is used to compute a MPI transpose between a R-distributed
! array and a Phi-distributed array
!

!-- Input fields
integer, intent(in) :: nPstart ! First index for phi-distributed arrays
integer, intent(in) :: nPstop ! Last index for phi-distributed arrays
type(load), intent(in) :: phi_balance(0:) ! Balancing info along phi
real(cp), intent(in) :: arr_Rloc(:,:,nRstart:) ! Input array (R-distributed)

!-- Output array
real(cp), intent(out) :: arr_Ploc(:,nPstart:,:) ! Output array (Phi-distributed)

#ifdef WITH_MPI
!-- Local variables
integer :: n_r, n_t, n_p
integer :: rcounts(0:n_procs-1), scounts(0:n_procs-1)
integer :: rdisp(0:n_procs-1), sdisp(0:n_procs-1)
real(cp), allocatable :: sbuff(:), rbuff(:)
integer :: p, ii, n_theta, n_phi

n_theta = size(arr_Rloc, 1)
n_phi = size(arr_Rloc, 2)

!-- Set displacements vectors and buffer sizes
do p=0,n_procs-1
scounts(p)=(nRstop-nRstart+1)*phi_balance(p)%n_per_rank*n_theta
rcounts(p)=radial_balance(p)%n_per_rank*(nPStop-nPStart+1)*n_theta
end do

rdisp(0)=0
sdisp(0)=0
do p=1,n_procs-1
sdisp(p)=sdisp(p-1)+scounts(p-1)
rdisp(p)=rdisp(p-1)+rcounts(p-1)
end do
allocate( sbuff(sum(scounts)), rbuff(sum(rcounts)) )
sbuff(:)=0.0_cp
rbuff(:)=0.0_cp

!-- Prepare buffer
do p=0,n_procs-1
ii=sdisp(p)+1
do n_r=nRstart,nRstop
do n_p=phi_balance(p)%nStart,phi_balance(p)%nStop
do n_t=1,n_theta
sbuff(ii)=arr_Rloc(n_t,n_p,n_r)
ii=ii+1
end do
end do
end do
end do

!-- All to all
call MPI_Alltoallv(sbuff, scounts, sdisp, MPI_DEF_REAL, &
& rbuff, rcounts, rdisp, MPI_DEF_REAL, &
& MPI_COMM_WORLD, ierr)

!-- Reassemble array
do p=0,n_procs-1
ii=rdisp(p)+1
do n_r=radial_balance(p)%nStart,radial_balance(p)%nStop
do n_p=nPstart,nPstop
do n_t=1,n_theta
arr_Ploc(n_t,n_p,n_r)=rbuff(ii)
ii=ii+1
end do
end do
end do
end do

!-- Clear memory from temporary arrays
deallocate( rbuff, sbuff )
#else
arr_Ploc(:,:,:)=arr_Rloc(:,:,:)
#endif

end subroutine transp_R2Phi
!-------------------------------------------------------------------------------
subroutine transp_Phi2R(arr_Ploc, arr_Rloc, phi_balance, nPstart, nPstop)
!
! This subroutine is used to compute a MPI transpose between a Phi-distributed
! array and a R-distributed array
!

!-- Input array
integer, intent(in) :: nPstart ! First index for phi-distributed arrays
integer, intent(in) :: nPstop ! Last index for phi-distributed arrays
type(load), intent(in) :: phi_balance(0:) ! Balancing info along phi
real(cp), intent(in) :: arr_Ploc(:,nPstart:,:) ! Input array (Phi-distributed)

!-- Output array
real(cp), intent(out) :: arr_Rloc(:,:,nRstart:) ! Output array (R-distributed)

#ifdef WITH_MPI
!-- Local variables
integer :: n_r, n_t, n_p
integer :: rcounts(0:n_procs-1), scounts(0:n_procs-1)
integer :: rdisp(0:n_procs-1), sdisp(0:n_procs-1)
real(cp), allocatable :: sbuff(:), rbuff(:)
integer :: p, ii, n_theta

n_theta = size(arr_Rloc, 1)

!-- Set displacements vectors and buffer sizes
do p=0,n_procs-1
scounts(p)=radial_balance(p)%n_per_rank*(nPstop-nPstart+1)*n_theta
rcounts(p)=(nRstop-nRstart+1)*phi_balance(p)%n_per_rank*n_theta
end do

rdisp(0)=0
sdisp(0)=0
do p=1,n_procs-1
sdisp(p)=sdisp(p-1)+scounts(p-1)
rdisp(p)=rdisp(p-1)+rcounts(p-1)
end do
allocate( sbuff(sum(scounts)), rbuff(sum(rcounts)) )
sbuff(:)=0.0_cp
rbuff(:)=0.0_cp

!-- Prepare buffer
do p=0,n_procs-1
ii=sdisp(p)+1
do n_r=radial_balance(p)%nStart,radial_balance(p)%nStop
do n_p=nPstart,nPstop
do n_t=1,n_theta
sbuff(ii)=arr_Ploc(n_t,n_p,n_r)
ii=ii+1
end do
end do
end do
end do

!-- All to all
call MPI_Alltoallv(sbuff, scounts, sdisp, MPI_DEF_REAL, &
& rbuff, rcounts, rdisp, MPI_DEF_REAL, &
& MPI_COMM_WORLD, ierr)

!-- Reassemble array
do p=0,n_procs-1
ii=rdisp(p)+1
do n_r=nRstart,nRstop
do n_p=phi_balance(p)%nStart,phi_balance(p)%nStop
do n_t=1,n_theta
arr_Rloc(n_t,n_p,n_r)=rbuff(ii)
ii=ii+1
end do
end do
end do
end do

!-- Clear memory from temporary arrays
deallocate( rbuff, sbuff )
#else
arr_Rloc(:,:,:)=arr_Ploc(:,:,:)
#endif

end subroutine transp_Phi2R
!-------------------------------------------------------------------------------
subroutine find_faster_block(idx_type)

Expand Down
87 changes: 6 additions & 81 deletions src/outMisc.f90
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ module outMisc_mod
use parallel_mod
use precision_mod
use mem_alloc, only: bytes_allocated
use communications, only: gather_from_Rloc, gather_from_lo_to_rank0
use communications, only: gather_from_Rloc, gather_from_lo_to_rank0, &
& transp_R2Phi
use truncation, only: n_r_max, n_theta_max, n_r_maxMag, n_phi_max, lm_max, &
& m_min, m_max, minc
use radial_data, only: n_r_icb, n_r_cmb, nRstart, nRstop, radial_balance
Expand Down Expand Up @@ -732,9 +733,10 @@ subroutine outPhase(time, timePassed, timeNorm, l_stop_time, nLogs, s, ds, phi)
#endif

!-- MPI transpose
call transp_R2Phi(temp_Rloc, temp_Ploc)
if ( l_dtphaseMovie ) call transp_R2Phi(dtemp_Rloc, dtemp_Ploc)
call transp_R2Phi(phase_Rloc, phase_Ploc)
call transp_R2Phi(temp_Rloc, temp_Ploc, phi_balance, nPstart, nPstop)
if ( l_dtphaseMovie ) call transp_R2Phi(dtemp_Rloc, dtemp_Ploc, phi_balance, &
& nPstart, nPstop)
call transp_R2Phi(phase_Rloc, phase_Ploc, phi_balance, nPstart, nPstop)

rmelt_axi_loc(:)=0.0_cp
rmelt_mean_loc=0.0_cp
Expand Down Expand Up @@ -1278,83 +1280,6 @@ subroutine calc_melt_frame()
end do

end subroutine calc_melt_frame
!------------------------------------------------------------------------------------
subroutine transp_R2Phi(arr_Rloc, arr_Ploc)
!
! This subroutine is used to compute a MPI transpose between a R-distributed
! array and a Phi-distributed array
!

!-- Input array
real(cp), intent(in) :: arr_Rloc(n_theta_max,n_phi_max,nRstart:nRstop)

!-- Output array
real(cp), intent(out) :: arr_Ploc(n_theta_max,nPstart:nPstop,n_r_max)

!-- Local variables
integer :: n_r, n_t, n_p
#ifdef WITH_MPI
integer, allocatable :: rcounts(:), scounts(:), rdisp(:), sdisp(:)
real(cp), allocatable :: sbuff(:), rbuff(:)
integer :: p, ii, my_phi_counts

!-- Set displacements vectors and buffer sizes
allocate( rcounts(0:n_procs-1), scounts(0:n_procs-1) )
allocate( rdisp(0:n_procs-1), sdisp(0:n_procs-1) )
do p=0,n_procs-1
my_phi_counts=phi_balance(p)%n_per_rank
scounts(p)=nR_per_rank*my_phi_counts*n_theta_max
rcounts(p)=radial_balance(p)%n_per_rank*(nPStop-nPStart+1)*n_theta_max
end do

rdisp(0)=0
sdisp(0)=0
do p=1,n_procs-1
sdisp(p)=sdisp(p-1)+scounts(p-1)
rdisp(p)=rdisp(p-1)+rcounts(p-1)
end do
allocate( sbuff(sum(scounts)), rbuff(sum(rcounts)) )
sbuff(:)=0.0_cp
rbuff(:)=0.0_cp

!-- Prepare buffer
do p=0,n_procs-1
ii=sdisp(p)+1
do n_r=nRstart,nRstop
do n_p=phi_balance(p)%nStart,phi_balance(p)%nStop
do n_t=1,n_theta_max
sbuff(ii)=arr_Rloc(n_t,n_p,n_r)
ii=ii+1
end do
end do
end do
end do

!-- All to all
call MPI_Alltoallv(sbuff, scounts, sdisp, MPI_DEF_REAL, &
& rbuff, rcounts, rdisp, MPI_DEF_REAL, &
& MPI_COMM_WORLD, ierr)

!-- Reassemble array
do p=0,n_procs-1
ii=rdisp(p)+1
do n_r=radial_balance(p)%nStart,radial_balance(p)%nStop
do n_p=nPstart,nPstop
do n_t=1,n_theta_max
arr_Ploc(n_t,n_p,n_r)=rbuff(ii)
ii=ii+1
end do
end do
end do
end do

!-- Clear memory from temporary arrays
deallocate( rcounts, scounts, rdisp, sdisp, rbuff, sbuff )
#else
arr_Ploc(:,:,:)=arr_Rloc(:,:,:)
#endif

end subroutine transp_R2Phi
!------------------------------------------------------------------------------------
subroutine gather_Ploc(arr_Ploc, arr_full)
!
Expand Down

0 comments on commit f28a7c0

Please sign in to comment.