From 091a8afedf01efd32e0765babff09776a3f8ac41 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Mon, 16 Dec 2024 12:37:37 +0000 Subject: [PATCH] docs: update sundials bench, new python bench (#116) * docs: update sundials bench, new python bench * misc updates * misc fixes --- benches/ode_solvers.rs | 6 +- benches/plot.py | 6 +- book/src/SUMMARY.md | 4 +- book/src/benchmarks/benchmarks.md | 10 + .../images}/bench_bdf.svg | 172 +-- .../images}/bench_bdf_diffsl.svg | 170 +- .../images}/bench_tr_bdf2_esdirk.svg | 284 ++-- book/src/benchmarks/images/python_plot.svg | 1367 +++++++++++++++++ book/src/benchmarks/python.md | 38 + book/src/benchmarks/python_plot.py | 28 + book/src/benchmarks/python_results.csv | 25 + .../{benchmarks.md => benchmarks/sundials.md} | 29 +- 12 files changed, 1772 insertions(+), 367 deletions(-) create mode 100644 book/src/benchmarks/benchmarks.md rename book/src/{images/benchmarks => benchmarks/images}/bench_bdf.svg (88%) rename book/src/{images/benchmarks => benchmarks/images}/bench_bdf_diffsl.svg (88%) rename book/src/{images/benchmarks => benchmarks/images}/bench_tr_bdf2_esdirk.svg (88%) create mode 100644 book/src/benchmarks/images/python_plot.svg create mode 100644 book/src/benchmarks/python.md create mode 100644 book/src/benchmarks/python_plot.py create mode 100644 book/src/benchmarks/python_results.csv rename book/src/{benchmarks.md => benchmarks/sundials.md} (81%) diff --git a/benches/ode_solvers.rs b/benches/ode_solvers.rs index b8cd9e2..b2d5606 100644 --- a/benches/ode_solvers.rs +++ b/benches/ode_solvers.rs @@ -222,8 +222,8 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function(stringify!($name), |b| { use diffsol::ode_solver::test_models::robertson::*; use diffsol::LlvmModule; + let (problem, soln) = robertson_diffsl_problem::<$matrix, LlvmModule>(); b.iter(|| { - let (problem, soln) = robertson_diffsl_problem::<$matrix, LlvmModule>(); benchmarks::$solver::<_, $linear_solver<_>>( &problem, soln.solution_points.last().unwrap().t, @@ -426,8 +426,8 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function(concat!(stringify!($name), "_", $N), |b| { use diffsol::ode_solver::test_models::heat2d::*; use diffsol::LlvmModule; + let (problem, soln) = heat2d_diffsl_problem::<$matrix, LlvmModule, $N>(); b.iter(|| { - let (problem, soln) = heat2d_diffsl_problem::<$matrix, LlvmModule, $N>(); benchmarks::$solver::<_, $linear_solver<_>>(&problem, soln.solution_points.last().unwrap().t) }) });)+ @@ -500,8 +500,8 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function(concat!(stringify!($name), "_", $N), |b| { use diffsol::ode_solver::test_models::foodweb::*; use diffsol::LlvmModule; + let (problem, soln) = foodweb_diffsl_problem::<$matrix, LlvmModule, $N>(); b.iter(|| { - let (problem, soln) = foodweb_diffsl_problem::<$matrix, LlvmModule, $N>(); benchmarks::$solver::<_, $linear_solver<_>>(&problem, soln.solution_points.last().unwrap().t) }) });)+ diff --git a/benches/plot.py b/benches/plot.py index 2b751f9..7ce3d46 100644 --- a/benches/plot.py +++ b/benches/plot.py @@ -11,7 +11,7 @@ "name": "robertson", "reference_name": "roberts_dns", "arg": None, - "solvers": ["nalgebra_esdirk34", "nalgebra_tr_bdf2", "nalgebra_bdf", "nalgebra_bdf_diffsl"], + "solvers": ["nalgebra_esdirk34", "nalgebra_tr_bdf2", "nalgebra_bdf", "nalgebra_bdf_diffsl"] }, { "name": "robertson_ode", @@ -24,7 +24,7 @@ "name": "heat2d", "reference_name": "heat2d_klu", "arg": [5, 10, 20, 30], - "solvers": ["faer_sparse_esdirk", "faer_sparse_tr_bdf2", "faer_sparse_bdf", "faer_sparse_bdf", "faer_sparse_bdf_diffsl"] + "solvers": ["faer_sparse_esdirk", "faer_sparse_tr_bdf2", "faer_sparse_bdf", "faer_sparse_bdf_diffsl"] #"solvers": ["faer_sparse_esdirk_klu", "faer_sparse_tr_bdf2_klu", "faer_sparse_bdf_klu", "faer_sparse_bdf_klu_diffsl"] }, { @@ -115,7 +115,7 @@ ax3.set_title("BDF solver") ax4.set_title("BDF solver + DiffSL") -basedir = "book/src/images/benchmarks" +basedir = "book/src/benchmarks/images" fig1.savefig(f"{basedir}/bench_tr_bdf2_esdirk.svg") fig2.savefig(f"{basedir}/bench_bdf.svg") fig3.savefig(f"{basedir}/bench_bdf_diffsl.svg") diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index c4dde42..2a0c1c4 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -26,4 +26,6 @@ - [Sparse problems](./specify/sparse_problems.md) - [Choosing a solver](./choosing_a_solver.md) - [Solving the problem](./solving_the_problem.md) -- [Benchmarks](./benchmarks.md) +- [Benchmarks](./benchmarks/benchmarks.md) + - [Sundials](./benchmarks/sundials.md) + - [Python (Diffrax & Casadi)](./benchmarks/python.md) diff --git a/book/src/benchmarks/benchmarks.md b/book/src/benchmarks/benchmarks.md new file mode 100644 index 0000000..9700e47 --- /dev/null +++ b/book/src/benchmarks/benchmarks.md @@ -0,0 +1,10 @@ +# Benchmarks + +The goal of this chapter is to compare the performance of the DiffSol implementation with other similar ode solver libraries. + +The libraries we will compare against are: +- [Sundials](https://computing.llnl.gov/projects/sundials): A suite of advanced numerical solvers for ODEs and DAEs, implemented in C. +- [Diffrax](https://docs.kidger.site/diffrax/): A Python library for solving ODEs and SDEs implemented using JAX. +- [Casadi](https://web.casadi.org/): A C++ library with Python anbd MATLAB bindings, for solving ODEs and DAEs, nonlinear optimisation and algorithmic differentiation. + +The comparison with Sundials will be done in Rust by wrapping C functions and comparing them to Rust implementations. The comparsion with the Python libraries (Diffrax and Casadi) will be done by wrapping DiffSol in Python using the PyO3 library, and comparing the performance of the wrapped functions. As well as benchmarking the DiffSol solvers, this also serves [as an example](https://github.com/martinjrobins/diffsol_python_benchmark) of how to wrap and use DiffSol in other languages. diff --git a/book/src/images/benchmarks/bench_bdf.svg b/book/src/benchmarks/images/bench_bdf.svg similarity index 88% rename from book/src/images/benchmarks/bench_bdf.svg rename to book/src/benchmarks/images/bench_bdf.svg index f8ea92a..8b4f842 100644 --- a/book/src/images/benchmarks/bench_bdf.svg +++ b/book/src/benchmarks/images/bench_bdf.svg @@ -6,7 +6,7 @@ - 2024-08-03T12:43:32.756370 + 2024-12-16T10:15:44.650748 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -86,7 +86,7 @@ z - + @@ -137,7 +137,7 @@ z - + @@ -151,7 +151,7 @@ z - + @@ -191,7 +191,7 @@ z - + @@ -205,7 +205,7 @@ z - + @@ -692,12 +692,12 @@ z - - + @@ -722,7 +722,7 @@ z - + @@ -737,7 +737,7 @@ z - + @@ -752,117 +752,117 @@ z - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -983,30 +983,30 @@ z - + - + - + - + - - - + - - - - - - + + @@ -1219,21 +1203,17 @@ z - - - - - - - + + - - - - - - - + + - - - - - + diff --git a/book/src/images/benchmarks/bench_bdf_diffsl.svg b/book/src/benchmarks/images/bench_bdf_diffsl.svg similarity index 88% rename from book/src/images/benchmarks/bench_bdf_diffsl.svg rename to book/src/benchmarks/images/bench_bdf_diffsl.svg index 49ff684..8334fbc 100644 --- a/book/src/images/benchmarks/bench_bdf_diffsl.svg +++ b/book/src/benchmarks/images/bench_bdf_diffsl.svg @@ -6,7 +6,7 @@ - 2024-08-03T12:43:32.826327 + 2024-12-16T10:15:44.720834 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -86,7 +86,7 @@ z - + @@ -137,7 +137,7 @@ z - + @@ -151,7 +151,7 @@ z - + @@ -191,7 +191,7 @@ z - + @@ -205,7 +205,7 @@ z - + @@ -692,12 +692,12 @@ z - - + @@ -722,7 +722,7 @@ z - + @@ -737,7 +737,7 @@ z - + @@ -752,117 +752,117 @@ z - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -983,23 +983,23 @@ z - + - + - + - - - + - - - + + - @@ -1296,27 +1282,23 @@ z - - - - - - - - - - + + + + + + - - - + + - - - - - - - - - - + + + + + + - + diff --git a/book/src/images/benchmarks/bench_tr_bdf2_esdirk.svg b/book/src/benchmarks/images/bench_tr_bdf2_esdirk.svg similarity index 88% rename from book/src/images/benchmarks/bench_tr_bdf2_esdirk.svg rename to book/src/benchmarks/images/bench_tr_bdf2_esdirk.svg index 19decac..0857434 100644 --- a/book/src/images/benchmarks/bench_tr_bdf2_esdirk.svg +++ b/book/src/benchmarks/images/bench_tr_bdf2_esdirk.svg @@ -6,7 +6,7 @@ - 2024-08-03T12:43:32.579115 + 2024-12-16T10:15:44.479613 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -86,7 +86,7 @@ z - + @@ -137,7 +137,7 @@ z - + @@ -151,7 +151,7 @@ z - + @@ -191,7 +191,7 @@ z - + @@ -205,7 +205,7 @@ z - + @@ -692,12 +692,12 @@ z - - + @@ -722,7 +722,7 @@ z - + @@ -737,7 +737,7 @@ z - + @@ -752,117 +752,117 @@ z - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -983,23 +983,23 @@ z - + - + - + - - - + - - - + + - @@ -1271,21 +1257,17 @@ z - - - - - - - + + - - - - @@ -1352,7 +1330,7 @@ z - + @@ -1365,7 +1343,7 @@ z - + @@ -1379,7 +1357,7 @@ z - + @@ -1393,7 +1371,7 @@ z - + @@ -1407,7 +1385,7 @@ z - + @@ -1421,7 +1399,7 @@ z - + @@ -1480,133 +1458,133 @@ z - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1642,23 +1620,23 @@ z - + - + - + - - - + + - - - + + @@ -1874,21 +1866,17 @@ L 599.445312 62.614688 - - - - - - - + + @@ -1915,20 +1903,16 @@ L 599.445312 77.570938 - - - - - + - + diff --git a/book/src/benchmarks/images/python_plot.svg b/book/src/benchmarks/images/python_plot.svg new file mode 100644 index 0000000..0389ce9 --- /dev/null +++ b/book/src/benchmarks/images/python_plot.svg @@ -0,0 +1,1367 @@ + + + + + + + + 2024-12-16T10:58:24.936784 + image/svg+xml + + + Matplotlib v3.9.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/book/src/benchmarks/python.md b/book/src/benchmarks/python.md new file mode 100644 index 0000000..250ec76 --- /dev/null +++ b/book/src/benchmarks/python.md @@ -0,0 +1,38 @@ +# Python (Diffrax & Casadi) + +[Diffrax](https://docs.kidger.site/diffrax/) is a Python library for solving ODEs and SDEs implemented using JAX. [Casadi](https://web.casadi.org/) is a C++ library with Python and MATLAB bindings, for solving ODEs and DAEs, nonlinear optimisation and algorithmic differentiation. In this benchmark we compare the performance of the DiffSol implementation with the Diffrax and Casadi libraries. + +As well as demonstrating the performance of the DiffSol library, this benchmark also serves as an example of how to wrap and use DiffSol in other languages. The code for this benchmark can be found [here](https://github.com/martinjrobins/diffsol_python_benchmark). The [maturin](https://www.maturin.rs/) library was used to generate a template for the Python bindings and the CI/CD pipeline neccessary to build the bindings, run pytest tests and build the wheels ready for distribution on PyPI. The [pyo3](https://github.com/PyO3/pyo3) library was used to wrap the DiffSol library in Python. + +## Problem setup + +We will use the `robertson_ode` problem for this benchmark. This is a stiff ODE system with 3 equations and 3 unknowns, and is a common benchmark problem for ODE solvers. To illustrate the performance over a range of problem sizes we duplicated the equations by a factor of `ngroups`, so the number of equations is `3 * ngroups`. + +For the Diffrax implementation we based this on the [example](https://docs.kidger.site/diffrax/examples/stiff_ode/) from the Diffrax documentation, extending this to include the `ngroups` parameter. As with the example, we used the `Kvaerno5` method for the solver. You can see the final implementation of the model [here](https://github.com/martinjrobins/diffsol_python_benchmark/blob/main/diffsol_python_benchmark/diffrax_models.py). + +For the Casadi implementation we wrote this from scratch using the libraries Python API. You can see the final implementation of the model [here](https://github.com/martinjrobins/diffsol_python_benchmark/blob/main/diffsol_python_benchmark/casadi_models.py). + +The DiffSol implementation of the model was done using the DiffSL language, and you can see the final implementation of the model [here](https://github.com/martinjrobins/diffsol_python_benchmark/blob/main/diffsol_python_benchmark/diffsol_models.py). + +The final implementation of the benchmark using these models is done [here](https://github.com/martinjrobins/diffsol_python_benchmark/blob/main/bench/bench.py). The DiffSol benchmark is done using the `bdf` solver. For `ngroup` < 20 it uses the `nalgebra` dense matrix and LU solver, and for `ngroups` >= 20 the `faer` sparse matrix and LU solver. + +## Differences between implementations + +There are a number of differences between the Diffrax, Casadi and DiffSol implementations that may affect the performance of the solvers. The main differences are: +- The Casadi implementation uses sparse matrices, whereas the DiffSol implementation uses dense matrices for `ngroups` < 20, and sparse matrices for `ngroups` >= 20. This will provide an advantage for DiffSol for smaller problems. +- I'm unsure if the Diffrax implementation uses sparse or dense matrices, but it is most likely dense, as JAX only has experimental support for sparse matrices. This will provide an advantage for DiffSol for larger problems. +- The Diffrax implementation uses the `Kvaerno5` method, which is a 5th order implicit Runge-Kutta method. This is different from the BDF method used by both the Casadi and DiffSol implementations. +- Each library was allowed to use multiple threads according to their default settings. The only part of the DiffSol implementation that takes advantage of multiple threads is the `faer` sparse LU solver and matrix. Both the `nalgebra` LU solver / matrix, and the DiffSL generated code are single-threaded only. Diffrax uses JAX, which takes advantage of multiple threads (CPU only, no GPUs were used in these benchmarks). The Casadi implementation also uses multiple threads, but I'm unsure of the details. + + +## Results + +The benchmarks were run on a Dell PowerEdge R7525 2U rack server, with dual AMD EPYC 7343 3.2Ghz 16C CPU and 128GB Memory. Each benchmark was run using both a low (1e-8) and high (1e-4) tolerances for both `rtol` and `atol`, and with `ngroup` ranging between 1 - 10,000. The results are presented in the following graphs, where the x-axis is the size of the problem `ngroup` and the y-axis is the time taken to solve the problem relative to the time taken by the DiffSol implementation (so `10^0` is the same time as DiffSol, `10^1` is 10 times slower etc.) + +![Python](./images/python_plot.svg) + +DiffSol is faster than both the Casadi and Diffrax implementations over the range of problem sizes and tolerances tested, although the Casadi and DiffSol implementations converge to be similar for larger problems (`ngroups` > 100). + +The region that DiffSol really outperforms the other implementations is for smaller problems (`ngroups` < 5), at `ngroups` = 1, Casadi and Diffrax are between 3 - 40 times slower than DiffSol. This small size region are where the dense matrix and solver used is more appropriate for the problem, and the overhead of the other libraries is more significant. The Casadi library needs to traverse a graph of operations to calculate each rhs or jacobian evaluation, whereas the DiffSL JIT compiler will compile to native code using the LLVM backend, along with low-level optimisations that are not available to Casadi. Diffrax as well is significantly slower than DiffSol for smaller problems, and this might be due to (a) Diffrax being a ML library and not optimised for solving stiff ODEs, and (b) double precision is used, which again is not a common use case for ML libraries. + +As the problem sizes get larger, the performance of Diffrax and Casadi improve rapidly relative to DiffSol, but after `ngroups` > 10 the performance of Diffrax drops off again, probably due to JAX not taking advantage of the sparse structure of the problem. The performance of Casadi continues to improve, and for `ngroups` > 100 it is comparable to DiffSol. By the time `ngroups` = 10,000, the performance of Casadi is identical to DiffSol. \ No newline at end of file diff --git a/book/src/benchmarks/python_plot.py b/book/src/benchmarks/python_plot.py new file mode 100644 index 0000000..597ab52 --- /dev/null +++ b/book/src/benchmarks/python_plot.py @@ -0,0 +1,28 @@ +import pandas as pd +import matplotlib.pyplot as plt + +# load python_results.csv file +df = pd.read_csv('book/src/benchmarks/python_results.csv') +diffrax_low_tol = df[df['tol'] == 1e-4]['diffrax_time'] +casadi_low_tol = df[df['tol'] == 1e-4]['casadi_time'] +diffsol_low_tol = df[df['tol'] == 1e-4]['diffsol_time'] +ngroups_low_tol = df[df['tol'] == 1e-4]['ngroups'] +diffrax_high_tol = df[df['tol'] == 1e-8]['diffrax_time'] +casadi_high_tol = df[df['tol'] == 1e-8]['casadi_time'] +diffsol_high_tol = df[df['tol'] == 1e-8]['diffsol_time'] +ngroups_high_tol = df[df['tol'] == 1e-8]['ngroups'] + +# plot the results from diffrax and casadi scaled by the reference (diffsol) +fig, ax = plt.subplots() +ax.plot(ngroups_low_tol, diffrax_low_tol / diffsol_low_tol, label='diffrax (tol=1e-4)') +ax.plot(ngroups_low_tol, casadi_low_tol / diffsol_low_tol, label='casadi (tol=1e-4)') +ax.plot(ngroups_low_tol, diffrax_high_tol / diffsol_high_tol, label='diffrax (tol=1e-8)') +ax.plot(ngroups_low_tol, casadi_high_tol / diffsol_high_tol, label='casadi (tol=1e-8)') +ax.legend() +ax.set_yscale('log') +ax.set_xscale('log') +ax.set_xlabel('ngroups') +ax.set_ylabel('Time relative to diffsol') + +plt.savefig('python_plot.png') +plt.savefig('book/src/benchmarks/images/python_plot.svg') diff --git a/book/src/benchmarks/python_results.csv b/book/src/benchmarks/python_results.csv new file mode 100644 index 0000000..fc7100e --- /dev/null +++ b/book/src/benchmarks/python_results.csv @@ -0,0 +1,25 @@ +ngroups,tol,casadi_time,diffsol_time,diffrax_time +1,0.0001,0.0011767226459924133,3.1152486801147464e-05,0.0003240704019553959 +1,1e-08,0.0018366031849291176,8.334216196089983e-05,0.00031373293697834013 +10,0.0001,0.001293827251996845,0.00019592561409808695,0.00045331977191381156 +10,1e-08,0.002042409308953211,0.00042948652803897856,0.0004587201240938157 +20,0.0001,0.0014396793539635836,0.0003247480639256537,0.0011893865989986807 +20,1e-08,0.0022794388150796296,0.0006490132620092481,0.0009902620629873126 +40,0.0001,0.0017169011461082846,0.0005423122260253876,0.003972686865832657 +40,1e-08,0.002716647819848731,0.001075255556963384,0.0028768490890506656 +70,0.0001,0.0020806838658172636,0.000872906104195863,0.016255469823023304 +70,1e-08,0.0032871941849589347,0.001750042901840061,0.015982740553095936 +100,0.0001,0.002430817337706685,0.0012063957899808883,0.032641630642116067 +100,1e-08,0.0038433158202096818,0.0023834101222455504,0.024714520414359866 +200,0.0001,0.0035816779760112455,0.0028888281384432638, +200,1e-08,0.006066468168139548,0.005082464570671812, +400,0.0001,0.006550437295809388,0.004779179625911638, +400,1e-08,0.010515550004784017,0.00890555749530904, +700,0.0001,0.009862706633284687,0.008093493856489659, +700,1e-08,0.020905138103291393,0.01849150845594704, +1000,0.0001,0.017060047758018806,0.012448280966944164, +1000,1e-08,0.024620620634717247,0.023281645935235754, +5000,0.0001,0.07811664411258933,0.07020582679021907, +5000,1e-08,0.12322642290229469,0.13951222289745746, +10000,0.0001,0.1340123851162692,0.13100541778840125, +10000,1e-08,0.238225372430558,0.2701200796549933, diff --git a/book/src/benchmarks.md b/book/src/benchmarks/sundials.md similarity index 81% rename from book/src/benchmarks.md rename to book/src/benchmarks/sundials.md index 136d16d..175bb3f 100644 --- a/book/src/benchmarks.md +++ b/book/src/benchmarks/sundials.md @@ -1,10 +1,6 @@ -# Benchmarks +# Sundials Benchmarks -The goal of this chapter is to compare the performance of the DiffSol -implementation with other similar ode solver libraries. To begin with we have -focused on comparing against the popular -[Sundials](https://github.com/LLNL/sundials) solver suite, developed by the -Lawrence Livermore National Laboratory. +To begin with we have focused on comparing against the popular [Sundials](https://github.com/LLNL/sundials) solver suite, developed by the Lawrence Livermore National Laboratory. ## Test Problems To choose the test problems we have used several of the examples provided in the Sundials library. The problems are: @@ -48,7 +44,7 @@ entire model by hand. ## Results -These results were generated using DiffSol v0.2.1. +These results were generated using DiffSol v0.5.1, and were run on a Dell PowerEdge R7525 2U rack server, with dual AMD EPYC 7343 3.2Ghz 16C CPU and 128GB Memory. The performance of each implementation was timed and includes all setup and solve time. The exception to this is for the DiffSl implementations, where the JIT compilation for the model was not included in the timings (since the compilation time for the C and Rust code was also not included). @@ -68,20 +64,25 @@ For the small, dense, stiff `robertson` problem the DiffSol implementation is ve For the sparse `heat2d` problem the DiffSol implementation is slower than Sundials for smaller problems (about 2) but the performance improves significantly for larger problems until it is at about 0.3. Since this improvement for larger systems is not seen in `foodweb` or `robertson_ode` problems, it is likely due to the fact that the `heat2d` problem has a constant jacobian matrix and the DiffSol implementation has an advantage in this case. -For the `foodweb` problem the DiffSol implementation is quite close to IDA for all sizes. -It is again slower for small systems (about 1.5) and the performance improves for medium systems until it reaches a value of 0.7, but then performance starts to slowly decrease for larger systems until it is about 1.0 +For the `foodweb` problem the DiffSol implementation is between 1.8 - 2.1 times slower than Sundials. It is interesting that when the problem is implemented in the DiffSl language (see benchmark below) the slowdown reduces to 1.5, indicating +that much of the performance difference is likely due to the implementation details of the Rust code for the rhs, jacobian and mass matrix functions. + +The DiffSol implementation of the `robertson_ode` problem ranges between 1.2 - 1.8 times slower than Sundials over the problem range. + +### tr_bdf2 & esdirk34 solvers (SDIRK) + +![Sdirk](./images/benchmarks/bench_tr_bdf2_esdirk.svg) + +The low-order `tr_bdf2` solver is slower than the `bdf` solver for all the problems, perhaps due to the generally tight tolerances used (`robertson` and `robertson_ode` have tolerances of 1e-6-1e-8, `heat2d` was 1e-7 and `foodweb` was 1e-5). The `esdirk34` solver is faster than `bdf` for the `foodweb` problem, but slightly slower for the other problems. -The DiffSol implementation of the `robertson_ode` problem is the only problem generally slower then the Sundials CVODE implementation and is about 1.5 - 1.9 the time taken by Sundials. Since the same method and linear solver is used in both cases the cause of this discrepancy is not -due to these factors, but is likely due to the differences in how the jacobian is calculated (in Sundials it is provided directly, but DiffSol is required to calculate it from the jacobian multiplication function). ### Bdf + DiffSl ![Bdf + DiffSl](./images/benchmarks/bench_bdf_diffsl.svg) The main difference between this plot and the previous for the Bdf solver is the use of the DiffSl language for the equations, rather than Rust closures. -The trends in each case are mostly the same, and the DiffSl implementation only has a small slowdown comparared with rust closures for most problems. -This difference is minimal, and can be seen most clearly for the small `robertson` problem where the DiffSl implementation is just above 1.0 times the speed of the Sundials implementation, while the rust closure implementation is about 0.9. -However, for some larger systems the DiffSl can be faster, for example the `foodweb` problem is quicker at larger sizes, which is probably due to the fact that the rust closures bound-check the array indexing, while the DiffSl implementation does not. +The trends in each case are mostly the same, and the DiffSl implementation only has a small slowdown comparared with rust closures for most problems. For some problems, such as `foodweb`, the DiffSl implementation is actually faster than the Rust closure implementation. +This may be due to the fact that the rust closures bound-check the array indexing, while the DiffSl implementation does not. This plot demonstrates that a DiffSL implementation can be comparable in speed to a hand-written Rust or C implementation, but much more easily wrapped and used from a high-level language like Python or R, where the equations can be passed down to the rust solver as a simple string and then JIT compiled at runtime. \ No newline at end of file