Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Switch to gets with no separate halo data structure #168

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions app/main.f90
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ program main
associate(input => input_t())
output = output_t(input, matcha(input))
block
double precision, allocatable :: simulated_distribution(:,:)
double precision, allocatable :: simulated_distribution(:,:), frequency_distribution(:)
integer, parameter :: freq=2
integer num_cells

num_cells = output%my_num_cells()
simulated_distribution = output%simulated_distribution()
simulated_distribution(:,freq) = num_cells*simulated_distribution(:,freq)
call co_sum(simulated_distribution(:,freq), result_image=1)
frequency_distribution = num_cells*simulated_distribution(:,freq) ! copy to work around nagfor bug
call co_sum(frequency_distribution, result_image=1)
call co_sum(num_cells, result_image=1)
if (this_image()==1) simulated_distribution(:,freq) = simulated_distribution(:,freq)/dble(num_cells)
end block
Expand Down
12 changes: 7 additions & 5 deletions example/heat-equation.f90
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,14 @@ module subroutine exchange_halo(self)

end interface

real, allocatable :: halo_x(:,:)[:]

end module

submodule(subdomain_2D_m) subdomain_2D_s
use assertions_m, only : assert
implicit none

real, allocatable :: halo_x(:,:)[:]
real dx_, dy_
integer, parameter :: west=1, east=2
integer my_nx, nx, ny, me, num_subdomains, my_internal_west, my_internal_east
Expand Down Expand Up @@ -114,7 +115,8 @@ module subroutine exchange_halo(self)
self%s_(my_nx, 2:ny-1) = merge(boundary_val, internal_val, me==num_subdomains) ! east subdomain boundary

if (allocated(halo_x)) deallocate(halo_x)
allocate(halo_x(west:east, ny)[*])
!allocate(halo_x(west:east, ny)[*])
allocate(halo_x(2, ny)[*])
call self%exchange_halo
end procedure

Expand All @@ -131,7 +133,7 @@ module subroutine exchange_halo(self)
integer i, j
real, allocatable :: halo_west(:), halo_east(:)

call assert(allocated(rhs%s_), "subdomain_2D_t%laplacian: allocated(rhs%s_)")
!call assert(allocated(rhs%s_), "subdomain_2D_t%laplacian: allocated(rhs%s_)")
call assert(allocated(halo_x), "subdomain_2D_t%laplacian: allocated(halo_x)")

allocate(laplacian_rhs(my_nx, ny))
Expand Down Expand Up @@ -168,8 +170,8 @@ module subroutine exchange_halo(self)
end procedure

module procedure exchange_halo
if (me>1) halo_x(east,:)[me-1] = self%s_(1,:)
if (me<num_subdomains) halo_x(west,:)[me+1] = self%s_(my_nx,:)
!if (me>1) halo_x(east,:)[me-1] = self%s_(1,:)
!if (me<num_subdomains) halo_x(west,:)[me+1] = self%s_(my_nx,:)
end procedure

module procedure values
Expand Down
26 changes: 16 additions & 10 deletions example/time-paradigm.f90
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
! Terms of use are as specified in LICENSE.txt
program time_paradigm_m
!! Time various alternative programming paradigms
use subdomain_m, only : subdomain_t
use subdomain_m, only : subdomain_t, march
use assert_m, only : assert
use sourcery_m, only : string_t, file_t, command_line_t, bin_t, csv
use iso_fortran_env, only : int64
Expand All @@ -12,7 +12,7 @@ program time_paradigm_m
character(len=:), allocatable :: steps_string, resolution_string
type(command_line_t) command_line
integer(int64) counter_start, counter_end, clock_rate
integer :: steps=200, resolution=64
integer :: steps=300, resolution=128

associate(me => this_image())
if (command_line%argument_present(["--help"])) then
Expand Down Expand Up @@ -52,20 +52,22 @@ function functional_programming_time() result(system_time)
integer(int64) t_start_functional, t_end_functional, clock_rate
integer step
real system_time
type(subdomain_t) T
type(subdomain_t), save :: T[*]

call T%define(side=1., boundary_val=T_boundary, internal_val=T_internal_initial, n=resolution)

call system_clock(t_start_functional)

associate(dt => T%dx()*T%dy()/(4*alpha))
call system_clock(t_start_functional)

functional_programming: &
do step = 1, steps
sync all
T = T + dt * alpha * .laplacian. T
end do functional_programming

call system_clock(t_end_functional, clock_rate)
end associate

call system_clock(t_end_functional, clock_rate)
system_time = real(t_end_functional - t_start_functional)/real(clock_rate)

associate(L_infinity_norm => maxval(abs(T%values() - T_steady)))
Expand All @@ -78,18 +80,22 @@ function procedural_programming_time() result(system_time)
integer(int64) t_start_procedural, t_end_procedural, clock_rate
integer step
real system_time
type(subdomain_t) T
type(subdomain_t), save :: T[*]

call T%define(side=1., boundary_val=0., internal_val=1., n=resolution)

associate(dt => T%dx()*T%dy()/(4*alpha))
call T%define(side=1., boundary_val=0., internal_val=1., n=resolution)
call system_clock(t_start_procedural)

procedural_programming: &
do step = 1, steps
call T%step(alpha*dt)
sync all
call march(alpha*dt, T)
end do procedural_programming

call system_clock(t_end_procedural, clock_rate)
end associate

call system_clock(t_end_procedural, clock_rate)
system_time = real(t_end_procedural - t_start_procedural)/real(clock_rate)

associate(L_infinity_norm => maxval(abs(T%values() - T_steady)))
Expand Down
10 changes: 8 additions & 2 deletions src/matcha/distribution_s.F90 → src/matcha/distribution_s.f90
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,11 @@ pure function monotonically_increasing(f) result(monotonic)
"distribution_t%cumulative_distribution: allocated(cumulative_distribution_)")
call assert(allocated(self%vel_), "distribution_t%cumulative_distribution: allocated(vel_)")

! Sample from the distribution
call do_concurrent_sampled_speeds(speeds, self%vel_, self%cumulative_distribution(), sampled_speeds)
! Sample from the distribution
associate(ncells => size(speeds,1), nsteps => size(speeds,2))
allocate(sampled_speeds(ncells,nsteps))
call do_concurrent_sampled_speeds(speeds, self%vel_, self%cumulative_distribution(), sampled_speeds)
end associate

associate(nsteps => size(speeds,2))

Expand All @@ -63,6 +66,9 @@ pure function monotonically_increasing(f) result(monotonic)
end associate
end associate

if(allocated(my_velocities)) deallocate(my_velocities)
allocate(my_velocities, mold=dir)

call do_concurrent_my_velocities(nsteps, dir, sampled_speeds, my_velocities)

end associate
Expand Down
18 changes: 9 additions & 9 deletions src/matcha/do_concurrent_m.f90
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module do_concurrent_m
use iso_c_binding, only : c_double, c_int
use t_cell_collection_m, only : t_cell_collection_t, t_cell_collection_bind_C_t
use t_cell_collection_m, only : t_cell_collection_bind_C_t
implicit none
private
public :: do_concurrent_sampled_speeds, do_concurrent_my_velocities, do_concurrent_k, do_concurrent_speeds
Expand All @@ -11,34 +11,34 @@ module do_concurrent_m
pure module subroutine do_concurrent_sampled_speeds(speeds, vel, cumulative_distribution, sampled_speeds) bind(C)
implicit none
real(c_double), intent(in) :: speeds(:,:), vel(:), cumulative_distribution(:)
real(c_double), intent(out), allocatable :: sampled_speeds(:,:)
real(c_double), intent(out) :: sampled_speeds(:,:)
end subroutine

pure module subroutine do_concurrent_my_velocities(nsteps, dir, sampled_speeds, my_velocities) bind(C)
implicit none
integer(c_int), intent(in) :: nsteps
real(c_double), intent(in) :: dir(:,:,:), sampled_speeds(:,:)
real(c_double), intent(out), allocatable :: my_velocities(:,:,:)
real(c_double), intent(out) :: my_velocities(:,:,:)
end subroutine

pure module subroutine do_concurrent_k(speeds, vel, k) bind(C)
implicit none
real(c_double), intent(in) :: speeds(:), vel(:)
integer(c_int), intent(out), allocatable :: k(:)
integer(c_int), intent(out) :: k(:)
end subroutine

pure module subroutine &
do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution) bind(C)
pure module subroutine do_concurrent_output_distribution(speed, freq, emp_distribution, k, output_distribution) &
bind(C)
implicit none
integer(c_int), intent(in) :: nintervals, speed, freq, k(:)
integer(c_int), intent(in) :: speed, freq, k(:)
real(c_double), intent(in) :: emp_distribution(:,:)
real(c_double), intent(out), allocatable :: output_distribution(:,:)
real(c_double), intent(out) :: output_distribution(:,:)
end subroutine

module subroutine do_concurrent_speeds(history, speeds) bind(C)
implicit none
type(t_cell_collection_bind_C_t), intent(in) :: history(:)
real(c_double), intent(out), allocatable :: speeds(:)
real(c_double), intent(out) :: speeds(:)
end subroutine

end interface
Expand Down
79 changes: 40 additions & 39 deletions src/matcha/do_concurrent_s.f90
Original file line number Diff line number Diff line change
@@ -1,68 +1,72 @@
submodule(do_concurrent_m) do_concurrent_s
use assert_m, only : assert
use iso_c_binding, only : c_f_pointer
implicit none

contains

module procedure do_concurrent_sampled_speeds

pure module subroutine do_concurrent_sampled_speeds(speeds, vel, cumulative_distribution, sampled_speeds) bind(C)
real(c_double), intent(in) :: speeds(:,:), vel(:), cumulative_distribution(:)
real(c_double), intent(out) :: sampled_speeds(:,:)
integer cell, step

call assert(all(shape(sampled_speeds)==shape(speeds)), "do_concurrent_sampled_speeds: {sampled_,}speeds shape match")

associate(ncells => size(speeds,1), nsteps => size(speeds,2))
allocate(sampled_speeds(ncells,nsteps))
do concurrent(cell = 1:ncells, step = 1:nsteps)
associate(k => findloc(speeds(cell,step) >= cumulative_distribution, value=.false., dim=1)-1)
sampled_speeds(cell,step) = vel(k)
end associate
end do
end associate

end procedure

module procedure do_concurrent_my_velocities

end subroutine

pure module subroutine do_concurrent_my_velocities(nsteps, dir, sampled_speeds, my_velocities) bind(C)
integer(c_int), intent(in) :: nsteps
real(c_double), intent(in) :: dir(:,:,:), sampled_speeds(:,:)
real(c_double), intent(out) :: my_velocities(:,:,:)
integer step

if(allocated(my_velocities)) deallocate(my_velocities)
allocate(my_velocities, mold=dir)

call assert(all([size(my_velocities,1),size(sampled_speeds,2)] == shape(sampled_speeds)), &
"do_concurrent_my_velocities: argument size match")
call assert(all(size(my_velocities,1)==shape(dir)), "do_concurrent_my_velocities: argument shape match")

do concurrent(step=1:nsteps)
my_velocities(:,step,1) = sampled_speeds(:,step)*dir(:,step,1)
my_velocities(:,step,2) = sampled_speeds(:,step)*dir(:,step,2)
my_velocities(:,step,3) = sampled_speeds(:,step)*dir(:,step,3)
end do

end procedure

module procedure do_concurrent_k
end subroutine

integer i
pure module subroutine do_concurrent_k(speeds, vel, k) bind(C)
real(c_double), intent(in) :: speeds(:), vel(:)
integer(c_int), intent(out) :: k(:)
integer i

associate(nspeeds => size(speeds))
if(allocated(k)) deallocate(k)
allocate(k(nspeeds))
do concurrent(i = 1:nspeeds)
k(i) = findloc(speeds(i) >= vel, value=.false., dim=1)-1
end do
end associate
end procedure

module procedure do_concurrent_output_distribution
end subroutine

pure module subroutine do_concurrent_output_distribution(speed, freq, emp_distribution, k, output_distribution) bind(C)
integer(c_int), intent(in) :: speed, freq, k(:)
real(c_double), intent(in) :: emp_distribution(:,:)
real(c_double), intent(out) :: output_distribution(:,:)
integer i

if(allocated(output_distribution)) deallocate(output_distribution)
allocate(output_distribution(nintervals,2))
output_distribution(:,freq) = 0.d0
output_distribution(:,speed) = emp_distribution(:,speed)
do concurrent(i = 1:size(output_distribution,1))
output_distribution(i,freq) = count(k==i)
end do

end procedure

module procedure do_concurrent_speeds
end subroutine

module subroutine do_concurrent_speeds(history, speeds) bind(C)
type(t_cell_collection_bind_C_t), intent(in) :: history(:)
real(c_double), intent(out) :: speeds(:)
integer i, j, k
integer, parameter :: nspacedims=3

Expand All @@ -78,19 +82,16 @@
x(i,:,:) = positions
end do

associate(t => history%time)
allocate(speeds(ncells*(npositions-1)))
do concurrent(i = 1:npositions-1, j = 1:ncells)
associate( &
u => (x(i+1,j,:) - x(i,j,:))/(t(i+1) - t(i)), &
ij => i + (j-1)*(npositions-1) &
)
speeds(ij) = sqrt(sum([(u(k)**2, k=1,nspacedims)]))
end associate
end do
end associate
do concurrent(i = 1:npositions-1, j = 1:ncells)
associate( &
u => (x(i+1,j,:) - x(i,j,:))/(history(i+1)%time - history(i)%time), &
ij => i + (j-1)*(npositions-1) &
)
speeds(ij) = sqrt(sum([(u(k)**2, k=1,nspacedims)]))
end associate
end do
end associate

end procedure
end subroutine

end submodule do_concurrent_s
end submodule do_concurrent_s
18 changes: 14 additions & 4 deletions src/matcha/output_s.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
submodule(output_m) output_s
use do_concurrent_m, only : do_concurrent_k, do_concurrent_output_distribution, do_concurrent_speeds
use t_cell_collection_m, only : t_cell_collection_bind_C_t
use iso_c_binding, only : c_loc, c_double
use iso_c_binding, only : c_double
implicit none

contains
Expand All @@ -24,16 +24,26 @@

integer, parameter :: speed=1, freq=2 ! subscripts for speeds and frequencies

associate(npositions => size(self%history_))
allocate(speeds(self%my_num_cells()*(npositions-1)))
end associate
call do_concurrent_speeds(t_cell_collection_bind_C_t(self%history_), speeds)

associate(emp_distribution => self%input_%sample_distribution())
block
real(c_double), allocatable :: emp_distribution(:,:)

emp_distribution = self%input_%sample_distribution()
associate(nintervals => size(emp_distribution(:,1)), dvel_half => (emp_distribution(2,speed)-emp_distribution(1,speed))/2.d0)
vel = [emp_distribution(1,speed) - dvel_half, [(emp_distribution(i,speed) + dvel_half, i=1,nintervals)]]
if (allocated(k)) deallocate(k)
allocate(k(size(speeds)))
call do_concurrent_k(speeds, vel, k)
call do_concurrent_output_distribution(nintervals, speed, freq, emp_distribution, k, output_distribution)
if(allocated(output_distribution)) deallocate(output_distribution)
allocate(output_distribution(nintervals,2))
call do_concurrent_output_distribution(speed, freq, emp_distribution, k, output_distribution)
output_distribution(:,freq) = output_distribution(:,freq)/sum(output_distribution(:,freq))
end associate
end associate
end block

end procedure

Expand Down
Loading
Loading