-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reduce memory use in transeq for both backends #130
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -233,30 +233,30 @@ subroutine transeq_halo_exchange(self, u, v, w, dir) | |
|
||
end subroutine transeq_halo_exchange | ||
|
||
subroutine transeq_dist_component(self, rhs, u, conv, & | ||
subroutine transeq_dist_component(self, rhs_du, u, conv, & | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, add a comment explaining that rhs_du is where the output is stored. |
||
u_recv_s, u_recv_e, & | ||
conv_recv_s, conv_recv_e, & | ||
tdsops_du, tdsops_dud, tdsops_d2u, dir) | ||
!! Computes RHS_x^u following: | ||
!! | ||
!! rhs_x^u = -0.5*(conv*du/dx + d(u*conv)/dx) + nu*d2u/dx2 | ||
!! Computes RHS_x^u following: | ||
!! | ||
!! rhs_x^u = -0.5*(conv*du/dx + d(u*conv)/dx) + nu*d2u/dx2 | ||
class(omp_backend_t) :: self | ||
class(field_t), intent(inout) :: rhs | ||
!> The result field, it is also used as temporary storage | ||
class(field_t), intent(inout) :: rhs_du | ||
class(field_t), intent(in) :: u, conv | ||
real(dp), dimension(:, :, :), intent(in) :: u_recv_s, u_recv_e, & | ||
conv_recv_s, conv_recv_e | ||
class(tdsops_t), intent(in) :: tdsops_du | ||
class(tdsops_t), intent(in) :: tdsops_dud | ||
class(tdsops_t), intent(in) :: tdsops_d2u | ||
integer, intent(in) :: dir | ||
class(field_t), pointer :: du, d2u, dud | ||
class(field_t), pointer :: d2u, dud | ||
|
||
du => self%allocator%get_block(dir, VERT) | ||
dud => self%allocator%get_block(dir, VERT) | ||
d2u => self%allocator%get_block(dir, VERT) | ||
|
||
call exec_dist_transeq_compact( & | ||
rhs%data, du%data, dud%data, d2u%data, & | ||
rhs_du%data, dud%data, d2u%data, & | ||
self%du_send_s, self%du_send_e, self%du_recv_s, self%du_recv_e, & | ||
self%dud_send_s, self%dud_send_e, self%dud_recv_s, self%dud_recv_e, & | ||
self%d2u_send_s, self%d2u_send_e, self%d2u_recv_s, self%d2u_recv_e, & | ||
|
@@ -266,7 +266,6 @@ subroutine transeq_dist_component(self, rhs, u, conv, & | |
self%mesh%par%nproc_dir(dir), self%mesh%par%pprev(dir), & | ||
self%mesh%par%pnext(dir), self%mesh%get_n_groups(dir)) | ||
|
||
call self%allocator%release_block(du) | ||
call self%allocator%release_block(dud) | ||
call self%allocator%release_block(d2u) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,7 +61,7 @@ subroutine exec_dist_tds_compact( & | |
end subroutine exec_dist_tds_compact | ||
|
||
subroutine exec_dist_transeq_compact( & | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a comment explaining that rhs_du is where the results are also stored. |
||
rhs, du, dud, d2u, & | ||
rhs_du, dud, d2u, & | ||
du_send_s, du_send_e, du_recv_s, du_recv_e, & | ||
dud_send_s, dud_send_e, dud_recv_s, dud_recv_e, & | ||
d2u_send_s, d2u_send_e, d2u_recv_s, d2u_recv_e, & | ||
|
@@ -71,8 +71,10 @@ subroutine exec_dist_transeq_compact( & | |
|
||
implicit none | ||
|
||
! du = d(u) | ||
real(dp), dimension(:, :, :), intent(out) :: rhs, du, dud, d2u | ||
!> The result array, it is also used as temporary storage | ||
real(dp), dimension(:, :, :), intent(out) :: rhs_du | ||
!> Temporary storage arrays | ||
real(dp), dimension(:, :, :), intent(out) :: dud, d2u | ||
|
||
! The ones below are intent(out) just so that we can write data in them, | ||
! not because we actually need the data they store later where this | ||
|
@@ -109,7 +111,7 @@ subroutine exec_dist_transeq_compact( & | |
!$omp parallel do private(ud, ud_recv_e, ud_recv_s) | ||
do k = 1, n_groups | ||
call der_univ_dist( & | ||
du(:, :, k), du_send_s(:, :, k), du_send_e(:, :, k), u(:, :, k), & | ||
rhs_du(:, :, k), du_send_s(:, :, k), du_send_e(:, :, k), u(:, :, k), & | ||
u_recv_s(:, :, k), u_recv_e(:, :, k), & | ||
tdsops_du%coeffs_s, tdsops_du%coeffs_e, tdsops_du%coeffs, & | ||
n, tdsops_du%dist_fw, tdsops_du%dist_bw, tdsops_du%dist_af & | ||
|
@@ -162,7 +164,7 @@ subroutine exec_dist_transeq_compact( & | |
|
||
!$omp parallel do | ||
do k = 1, n_groups | ||
call der_univ_subs(du(:, :, k), & | ||
call der_univ_subs(rhs_du(:, :, k), & | ||
du_recv_s(:, :, k), du_recv_e(:, :, k), & | ||
n, tdsops_du%dist_sa, tdsops_du%dist_sc) | ||
|
||
|
@@ -177,8 +179,9 @@ subroutine exec_dist_transeq_compact( & | |
do j = 1, n | ||
!$omp simd | ||
do i = 1, SZ | ||
rhs(i, j, k) = -0.5_dp*(v(i, j, k)*du(i, j, k) + dud(i, j, k)) & | ||
+ nu*d2u(i, j, k) | ||
rhs_du(i, j, k) = -0.5_dp*(v(i, j, k)*rhs_du(i, j, k) & | ||
+ dud(i, j, k)) & | ||
+ nu*d2u(i, j, k) | ||
end do | ||
!$omp end simd | ||
end do | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a comment explaining that r_du is where the output is