fortran-lang · zerothi · Jul 2, 2021
diff --git a/poisson2d/README.md b/poisson2d/README.md
@@ -3,14 +3,48 @@
 This solver uses a simple Jacobi iteration to solve a Poisson equation. For details, see, for example,
 "Computational Physics" by Mark Newman, Chap 9. Compiler commands were:
 
-```gfortran sourcefile.f95 -Ofast -o program```
+```gfortran sourcefile.f95 -O3 -o program```
 
-```gcc sourcefile.c -Ofast -o program```
+```gcc sourcefile.c -O3 -o program```
 
 For Cython module (import it to run):
 
 ```python setup.py build_ext --inplace```
 
+
+There are various implementations using various strategies.
+All codes are named with a 1-1 correspondance, i.e. `c_i_*` has the same strategy as `fortran_i_*` files.
+The suffixed `_*` is only used if there are multiple alternatives that utilizes the same strategy between
+the different languages. For example the `c_4_1d` and `c_4_2d` are both using the same strategy for
+solving the Poisson equation, while the former uses a 1D array for the data while the latter uses a 2D
+array with some slight overhead of another pointer level.
+
+The two files `c_common.c` and `fortran_common.f90` are baseline programs used for printing
+out data and for parsing the command line arguments.
+
+All executables accept 2 arguments:
+
+    # N = 100
+    # N_ITER = 2000
+    ./c_1 100 2000
+    100             2000  5.658391137993336e+04  1.448613750000000e-03
+
+which will use a 2D array of size `100x100` and run for 2000 iterations.
+The output line consists of 4 values, 1) N, 2) N_ITER, 3) max-difference, 4) time per iteration.
+
+Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html. Also, there was
+discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461 
+with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk).
+
+The entire code base has been re-written to allow command-line arguments allowing to sample different
+matrix sizes with the same executable (user zerothi).
+
+To run a preset benchmark suite, simply execute `bash run.sh` which will run the currently
+implemented benchmarks with dimensions up to 600.
+
+
+## Old timings
+
 The grid is a 2-dimensional array of size MxM. The timings for different values of M are, in seconds:
 
 | Language            | M=100           | M=200                 | M=300                  |
@@ -25,7 +59,3 @@ The grid is a 2-dimensional array of size MxM. The timings for different values
 * For all of these results, the amount of iterations performed by the respective codes was approximately
 the same, with the exception of the 100x100 grid C codes, which did nearly a double amount of iterations
 compared to the rest, for some reason. The timings are on AMD Ryzen 5 3600 @3.6GHz, using WSL2 on Windows 10.
-
-Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html . Also, there was
-discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461 
-with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk)
diff --git a/poisson2d/c_1.c b/poisson2d/c_1.c
@@ -0,0 +1,110 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include "c_common.h"
+
+// COMMENT
+// I know, this is extremely bad practice.
+// But to accommodate argument sizes for the arguments
+// I felt this was ok.
+// It makes it much more versatile when testing
+// END COMMENT
+void swap(int M, double (*a)[M], double (*b)[M])
+{
+  double swp;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      swp = a[i][j];
+      a[i][j] = b[i][j];
+      b[i][j] = swp;
+    }
+  }
+}
+
+// See comment above
+double mmax(int M, double (*phi)[M], double (*phip)[M]) {
+  double max = 0.0;
+  double diff = 0.0;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      diff = fabs(phi[i][j]-phip[i][j]);
+      if (diff > max)
+        max = diff;
+    }
+  }
+  return max;
+}
+
+double rho(double x, double y) {
+  double s1 = 0.6;
+  double e1 = 0.8;
+  double s2 = 0.2;
+  double e2 = 0.4;
+
+  if (x > s1 && x < e1 && y > s1 && y < e1) {
+    return 1.0;
+  } else if (x > s2 && x < e2 && y > s2 && y < e2 ) {
+    return -1.0;
+  } else {
+    return 0.0;
+  }
+}
+
+clock_t run(int M, int N_ITER) {
+
+  // COMMENT:
+  // I wouldn't even imagine any novice in C would
+  // do something like this. It really makes no sense :)
+  // END OF COMMENT
+  double (*phi)[M];
+  double (*phip)[M]; 
+  double (*rhoa)[M]; 
+  phi = malloc(sizeof(double[M][M]));
+  phip = malloc(sizeof(double[M][M]));
+  rhoa = malloc(sizeof(double[M][M]));
+
+  clock_t tic = clock();
+
+  // initialize matrices
+  memset(phip, 0, sizeof(phip[0][0])*M*M);
+  memset(phi, 0, sizeof(phi[0][0])*M*M);
+
+  double delta = 1.0;
+  double a2 = pow(SPACE,2.0);
+
+  int iter = 0;
+  while (iter < N_ITER ) {
+    iter += 1;
+
+    for (int i=1; i < M-1; i++) {
+      for (int j=1; j < M-1; j++) {
+        phip[i][j] = (phi[i+1][j] + phi[i-1][j] +
+                      phi[i][j+1] + phi[i][j-1])/4.0 +
+	  a2/(4.0 * EPSILON0)*rho(i*SPACE,j*SPACE);
+      }
+    }
+    delta = mmax(M, phi, phip);
+    swap(M, phi, phip);
+
+  }
+
+  clock_t toc = clock();
+
+  free(phi);
+  free(phip);
+  free(rhoa);
+
+  write_timing(M, iter, delta, toc - tic);
+}
+
+
+int main(int argc, char *argv[]) {
+
+  int N, N_ITER;
+
+  // Retrieve arguments
+  parse_args(argc, argv, &N, &N_ITER);
+  run(N, N_ITER);
+}
diff --git a/poisson2d/c_2.c b/poisson2d/c_2.c
@@ -0,0 +1,98 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include "c_common.h"
+
+// COMMENT
+// I know, this is extremely bad practice.
+// But to accommodate argument sizes for the arguments
+// I felt this was ok.
+// It makes it much more versatile when testing
+// END COMMENT
+void swap(int M, double a[M][M], double b[M][M]) {
+  double swp;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      swp = a[i][j];
+      a[i][j] = b[i][j];
+      b[i][j] = swp;
+    }
+  }
+}
+
+// See comment above
+double mmax(int M, double a[M][M], double b[M][M]) {
+  double max = 0.0;
+  double diff = 0.0;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      diff = fabs(a[i][j]-b[i][j]);
+      if (diff > max)
+        max = diff;
+    }
+  }
+  return max;
+}
+
+double rho(double x, double y) {
+  double s1 = 0.6;
+  double e1 = 0.8;
+  double s2 = 0.2;
+  double e2 = 0.4;
+
+  if (x > s1 && x < e1 && y > s1 && y < e1) {
+    return 1.0;
+  } else if (x > s2 && x < e2 && y > s2 && y < e2 ) {
+    return -1.0;
+  } else {
+    return 0.0;
+  }
+}
+
+void run(int M, int N_ITER) {
+
+  double phi[M][M];
+  double phip[M][M]; 
+
+  clock_t tic = clock();
+  int iter = 0;
+
+  // initialize matrices
+  memset(&phip[0][0], 0, sizeof(double)*M*M);
+  memset(&phi[0][0], 0, sizeof(double)*M*M);
+
+  double delta;
+  double a2 = pow(SPACE,2.0);
+
+  while (iter < N_ITER ) {
+    iter += 1;
+
+    for (int i=1; i < M-1; i++) {
+      for (int j=1; j < M-1; j++) {
+        phip[i][j] = (phi[i+1][j] + phi[i-1][j] +
+                      phi[i][j+1] + phi[i][j-1])/4.0 +
+	  a2/(4.0 * EPSILON0)*rho(i*SPACE,j*SPACE);
+      }
+    }
+    delta = mmax(M, phi, phip);
+    swap(M, phi, phip);
+
+  }
+
+  clock_t toc = clock();
+
+  write_timing(M, iter, delta, toc - tic);
+}
+
+
+int main(int argc, char *argv[]) {
+
+  int N, N_ITER;
+
+  // Retrieve arguments
+  parse_args(argc, argv, &N, &N_ITER);
+
+  run(N, N_ITER);
+}
diff --git a/poisson2d/c_3.c b/poisson2d/c_3.c
@@ -0,0 +1,104 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include "c_common.h"
+
+// COMMENT
+// I know, this is extremely bad practice.
+// But to accommodate argument sizes for the arguments
+// I felt this was ok.
+// It makes it much more versatile when testing
+// END COMMENT
+void swap(int M, double a[M][M], double b[M][M]) {
+  double swp;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      swp = a[i][j];
+      a[i][j] = b[i][j];
+      b[i][j] = swp;
+    }
+  }
+}
+
+// See comment above
+double mmax(int M, double a[M][M], double b[M][M]) {
+  double max = 0.0;
+  double diff = 0.0;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      diff = fabs(a[i][j]-b[i][j]);
+      if (diff > max)
+        max = diff;
+    }
+  }
+  return max;
+}
+
+double rho(double x, double y) {
+  double s1 = 0.6;
+  double e1 = 0.8;
+  double s2 = 0.2;
+  double e2 = 0.4;
+
+  if (x > s1 && x < e1 && y > s1 && y < e1) {
+    return 1.0;
+  } else if (x > s2 && x < e2 && y > s2 && y < e2 ) {
+    return -1.0;
+  } else {
+    return 0.0;
+  }
+}
+
+void run(int M, int N_ITER) {
+
+  double phi[M][M];
+  double phip[M][M]; 
+  double rhoa[M][M]; 
+
+  clock_t tic = clock();
+  int iter = 0;
+
+  // initialize matrices
+  memset(&phip[0][0], 0, sizeof(double)*M*M);
+  memset(&phi[0][0], 0, sizeof(double)*M*M);
+  for (int i=1; i<M-1; i++)  {
+    for (int j=1; j<M-1; j++) {
+      rhoa[i][j] =  rho(i*SPACE,j*SPACE);
+    }
+  }
+
+  double delta;
+  double a2 = pow(SPACE,2.0);
+
+  while (iter < N_ITER ) {
+    iter += 1;
+
+    for (int i=1; i < M-1; i++) {
+      for (int j=1; j < M-1; j++) {
+        phip[i][j] = (phi[i+1][j] + phi[i-1][j] +
+                      phi[i][j+1] + phi[i][j-1])/4.0 +
+	  a2/(4.0 * EPSILON0)*rhoa[i][j];
+      }
+    }
+    delta = mmax(M, phi, phip);
+    swap(M, phi, phip);
+
+  }
+
+  clock_t toc = clock();
+
+  write_timing(M, iter, delta, toc - tic);
+}
+
+
+int main(int argc, char *argv[]) {
+
+  int N, N_ITER;
+
+  // Retrieve arguments
+  parse_args(argc, argv, &N, &N_ITER);
+
+  run(N, N_ITER);
+}