Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try to increase cache usage in derive / interp operators #22

Draft
wants to merge 17 commits into
base: hack_DC_TGV
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ else ifeq ($(CMP),nvhpc)
FC = mpif90
FFLAGS += -cpp -O3 -march=native
FFLAGS += -Minfo=accel -stdpar -acc -target=multicore
# FFLAGS = -cpp -Mfree -Kieee -Minfo=accel -g -acc -target=gpu -fast -O3 -Minstrument
FFLAGS = -cpp -Mfree -Kieee -Minfo=accel,ftn,inline,loop,vect,opt,stdpar -stdpar=gpu -gpu=cc70,managed,lineinfo,deepcopy -acc -target=gpu -traceback -O3 -Minstrument -g
LFLAGS += -acc -lnvhpcwrapnvtx
endif

Expand All @@ -70,9 +70,9 @@ SRC = $(SRCDIR)/x3d_precision.f90 $(SRCDIR)/module_param.f90 $(SRCDIR)/time_inte
ifeq ($(FFT),fftw3)
#FFTW3_PATH=/usr
#FFTW3_PATH=/usr/lib64
FFTW3_PATH=/usr/local/Cellar/fftw/3.3.7_1
FFTW3_PATH=/opt/software/builder/developers/libraries/fftw/3.3.8/1/gcc-10.2.0-openmpi-4.0.5
INC=-I$(FFTW3_PATH)/include
LIBFFT=-L$(FFTW3_PATH) -lfftw3 -lfftw3f
LIBFFT=-L$(FFTW3_PATH)/lib -lfftw3 -lfftw3f
else ifeq ($(FFT),fftw3_f03)
FFTW3_PATH=/usr #ubuntu # apt install libfftw3-dev
#FFTW3_PATH=/usr/lib64 #fedora # dnf install fftw fftw-devel
Expand Down
33 changes: 31 additions & 2 deletions src/thomas.f90
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ module thomas
module procedure zthomas_12
end interface zthomas

interface thomas1d
module procedure thomas1d_0
module procedure thomas1d_12
end interface thomas1d


contains

Expand Down Expand Up @@ -181,7 +186,9 @@ end subroutine zthomas_12
! fw, in, used during the backward step
! nn, in, size of the vector
!
pure subroutine thomas1d(tt, ff, fs, fw, nn)
pure subroutine thomas1d_12(tt, ff, fs, fw, nn)

!$acc routine seq

implicit none

Expand All @@ -199,6 +206,28 @@ pure subroutine thomas1d(tt, ff, fs, fw, nn)
tt(k) = (tt(k)-ff(k)*tt(k+1)) * fw(k)
enddo

end subroutine thomas1d
end subroutine thomas1d_12

pure subroutine thomas1d_0(tt, ff, fs, fw, perio, alfa, nn)

!$acc routine seq

implicit none

integer, intent(in) :: nn
real(mytype), intent(inout), dimension(nn) :: tt
real(mytype), intent(in), dimension(nn) :: ff, fs, fw, perio
real(mytype), intent(in) :: alfa

integer :: k
real(mytype) :: ss

call thomas1d_12(tt, ff, fs, fw, nn)
ss = (tt(1)-alfa*tt(nn)) / (one + perio(1) - alfa*perio(nn))
do k = 1, nn
tt(k) = tt(k) - ss*perio(k)
enddo

end subroutine thomas1d_0

end module thomas
6 changes: 6 additions & 0 deletions src/transeq.f90
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,17 @@ subroutine momentum_rhs_eq(dux1,duy1,duz1,ux1,uy1,uz1)
enddo

call derx (td1,ta1,x3d_op_derxp,xsize(1),xsize(2),xsize(3))
print *, "td1", minval(td1), maxval(td1)
call derx (te1,tb1,x3d_op_derx, xsize(1),xsize(2),xsize(3))
print *, "te1", minval(te1), maxval(te1)
call derx (tf1,tc1,x3d_op_derx, xsize(1),xsize(2),xsize(3))
print *, "tf1", minval(tf1), maxval(tf1)
call derx (ta1,ux1,x3d_op_derx, xsize(1),xsize(2),xsize(3))
print *, "ta1", minval(ta1), maxval(ta1)
call derx (tb1,uy1,x3d_op_derxp,xsize(1),xsize(2),xsize(3))
print *, "tb1", minval(tb1), maxval(tb1)
call derx (tc1,uz1,x3d_op_derxp,xsize(1),xsize(2),xsize(3))
print *, "tc1", minval(tc1), maxval(tc1)

! Convective terms of x-pencil are stored in tg1,th1,ti1
do concurrent (k=1:xsize(3), j=1:xsize(2), i=1:xsize(1))
Expand Down
Loading