From 5184dda5a196614b1025d058e58214db12a98807 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 2 Jul 2021 19:02:10 +0200 Subject: [PATCH] mnt: added optimize1d.c Also benchmarks should not rely on convergence, rather a fixed number of iterations is more appropriate. The problem of inconsistent iterations between the C and Fortran versions was also fixed because of an indexing error when calculating the cell coordinates (C is 0-based, F is 1-based). So they now run the same number of iterations, regardless of M. Moved timings about to make them more comparable. Ensured that all examples now use allocate/malloc which is much more realistic. Even though it does not have the same performance it is much more appropriate for benchmark cases. Lastly, made calls more similar, C and F both call a function for the iteration step. --- poisson2d/README.md | 2 + poisson2d/optimized.f90 | 96 ++++++++++---------- poisson2d/optimized1d.c | 106 +++++++++++++++++++++++ poisson2d/{optimized.c => optimized2d.c} | 12 ++- 4 files changed, 168 insertions(+), 48 deletions(-) create mode 100644 poisson2d/optimized1d.c rename poisson2d/{optimized.c => optimized2d.c} (95%) diff --git a/poisson2d/README.md b/poisson2d/README.md index 14ae309..82915a8 100644 --- a/poisson2d/README.md +++ b/poisson2d/README.md @@ -29,3 +29,5 @@ compared to the rest, for some reason. The timings are on AMD Ryzen 5 3600 @3.6G Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html . Also, there was discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461 with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk) + +The codes were rewritten for more realistic cases by zerothi. diff --git a/poisson2d/optimized.f90 b/poisson2d/optimized.f90 index 3b9d997..536ed00 100644 --- a/poisson2d/optimized.f90 +++ b/poisson2d/optimized.f90 @@ -1,44 +1,26 @@ -module rhofunc - implicit none - public - integer, parameter :: dp=kind(0.d0) - -contains - - pure real(dp) function rho(x,y) - real(dp), intent(in) :: x,y - if ( 0.6_dp < x .and. x < 0.8_dp .and. 0.6_dp < y .and. y < 0.8_dp ) then - rho = 1.0_dp - else if ( 0.2_dp < x .and. x < 0.4_dp .and. 0.2_dp < y .and. y < 0.4_dp ) then - rho = -1.0_dp - else - rho = 0.0_dp - end if - end function rho - -end module rhofunc - program poisson - use rhofunc, only: rho + implicit none + + integer, parameter :: M = 1000 integer, parameter :: dp=kind(0.d0) - integer, parameter :: M = 300 + integer, parameter :: N_ITER = 10000 real(dp),parameter :: target=1E-1_dp real(dp),parameter :: a=0.01_dp - - integer :: i,j, iter real(dp),parameter :: epsilon0=8.85E-12_dp - real(dp) :: delta, b, e, a2 + + integer :: i,j, iter + real(dp) :: delta, b, e, a2 real(dp), allocatable :: rhoarr(:,:) real(dp), allocatable, target :: phiprime(:,:), phi(:,:) - real(dp), pointer :: p(:,:), pnew(:,:) + real(dp), pointer :: p(:,:), pnew(:,:), tmp(:,:) + + call cpu_time(b) allocate(phiprime(M,M), phi(M,M)) allocate(rhoarr(M,M)) - call cpu_time(b) - ! Fortran doesn't care too much about pow ! since it figures out if the exponent is an integer or not a2 = a*a / epsilon0 @@ -48,21 +30,43 @@ program poisson end do end do - ! We only need to initialize 1 array phi(:,:) = 0.0_dp + phiprime(:,:) = 0.0_dp + p => phi + pnew => phiprime iter = 0 delta = target + 1._dp - do while ( delta > target ) + do while ( iter < N_ITER ) iter = iter + 1 + call iterate(rhoarr, p, pnew, delta) + ! Easer to swap pointer than copying stuff around - if ( mod(iter, 2) == 1 ) then - p => phi - pnew => phiprime - else - p => phiprime - pnew => phi - end if + ! In fortran we could even use pointers + ! But we can just call + tmp => p + p => pnew + pnew => tmp + + end do + + deallocate(phi, phiprime, rhoarr) + + call cpu_time(e) + + write(*,'(a)') 'fortran version' + write(*,'(a,f20.10)') 'delta = ',delta + write(*,'(a,i0)') 'Iterations = ',iter + write(*,'(a,f20.10)') 'Time = ',e - b + +contains + + subroutine iterate(rhoarr, p, pnew, delta) + real(dp), intent(in) :: p(:,:), rhoarr(:,:) + real(dp), intent(out) :: pnew(:,:) + real(dp), intent(out) :: delta + + integer :: i, j delta = 0._dp do j=2, M-1 @@ -72,13 +76,17 @@ program poisson end do end do - end do - - call cpu_time(e) + end subroutine iterate - deallocate(phi, phiprime, rhoarr) - - write(*,'(a,i0)') 'Iterations = ',iter - write(*,'(a,f20.10)') 'Time = ',e - b + pure real(dp) function rho(x,y) + real(dp), intent(in) :: x,y + if ( 0.6_dp < x .and. x < 0.8_dp .and. 0.6_dp < y .and. y < 0.8_dp ) then + rho = 1.0_dp + else if ( 0.2_dp < x .and. x < 0.4_dp .and. 0.2_dp < y .and. y < 0.4_dp ) then + rho = -1.0_dp + else + rho = 0.0_dp + end if + end function rho end program poisson diff --git a/poisson2d/optimized1d.c b/poisson2d/optimized1d.c new file mode 100644 index 0000000..adbb7c9 --- /dev/null +++ b/poisson2d/optimized1d.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include +#define M 1000 +#define N_ITER 10000 + +#define IDX(i,j) (i)*M+j + + +double rho(const double x, const double y) { + const double s1 = 0.6; + const double e1 = 0.8; + const double s2 = 0.2; + const double e2 = 0.4; + + if (s1 < x && x < e1 && s1 < y && y < e1) { + return 1.0; + } else if ( s2 < x && x < e2 && s2 < y && y < e2 ) { + return -1.0; + } else { + return 0.0; + } +} + +double iterate(double *restrict phi, double *restrict phinew, double *restrict rhoa) { + double delta = 0, err; + for (int i=1; i < M-1; i++) { + for (int j=1; j < M-1; j++) { + phinew[IDX(i,j)] = (phi[IDX(i+1,j)] + phi[IDX(i-1,j)] + phi[IDX(i,j+1)] + phi[IDX(i,j-1)] + rhoa[IDX(i,j)])*0.25; + err = fabs(phinew[IDX(i,j)] - phi[IDX(i,j)]); + if ( err > delta ) delta = err; + } + } + return delta; +} + +void init_rho(double *restrict rhoa, const double epsilon, const double a) { + const double a2 = a * a / epsilon; + for (int i=1; i #include #include -#define M 300 +#define M 1000 +#define N_ITER 10000 double ** malloc_2d(int m, int n) { @@ -99,7 +100,7 @@ void run(double toler, double a) int iter = 0; double delta; - do { + while ( iter < N_ITER ) { iter += 1; delta = iterate(phi, phip, rhoa); @@ -108,20 +109,23 @@ void run(double toler, double a) tmp = phi; phi = phip; phip = tmp; - } while ( delta > toler ); + } free_2d(phi); free_2d(phip); free_2d(rhoa); + printf("delta = %20.10f\n",delta); printf("Iterations = %d\n", iter); } int main(int argc, char *argv[]) { - clock_t start = clock(); const double target = 1e-1; const double a = 0.01; + + clock_t start = clock(); + printf("c version [2d]\n"); run(target, a); clock_t end = clock(); double total = ((double)(end - start)) / CLOCKS_PER_SEC;