diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d7a38e1..0285c44 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -95,7 +95,11 @@ jobs:
env:
USER_NAME: matekelemen
- name: Generate Docs
- run: cd docs && doxygen doxyfile && cd ..
+ run: |
+ cd docs
+ python3 prepareDocs.py
+ doxygen doxyfile
+ cd ..
- name: Generate root index.html
run: echo "" > index.html
- name: Update Remote
diff --git a/docs/doxyfile b/docs/doxyfile
index 666811d..b7ff6d3 100644
--- a/docs/doxyfile
+++ b/docs/doxyfile
@@ -19,7 +19,7 @@ SHOW_INCLUDE_FILES = NO
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = NO
-INPUT = ../readme.md \
+INPUT = readme.md \
../include
FILE_PATTERNS = *.hpp
RECURSIVE = YES
@@ -30,7 +30,7 @@ EXCLUDE_PATTERNS = */impl/* \
*_test*
EXCLUDE_SYMBOLS = detail::* \
*::detail*
-USE_MDFILE_AS_MAINPAGE = ../readme.md
+USE_MDFILE_AS_MAINPAGE = readme.md
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
diff --git a/docs/readme.md b/docs/readme.md
new file mode 100644
index 0000000..5499db0
--- /dev/null
+++ b/docs/readme.md
@@ -0,0 +1,113 @@
+# MCGS - Multicolor Gauss-Seidel Smoother
+
+MCGS is a lightweight library for performing parallelized Gauss-Sidel smoothing, focusing on sparse systems with imbalanced topologies. Implementations for related tasks such as graph coloring and reordering are included as well.
+
+
+
+
+
+
+
+## Features
+
+- Gauss-Seidel iterations in parallel (OpenMP) or serial
+- SOR (successive over-relaxation) in parallel (OpenMP) or serial
+- Graph coloring in parallel (loose implementation of `doi:10.1006/jagm.2000.1097`)
+- Matrix/vector reordering and reverse reordering
+
+## Usage
+
+Typical workflow
+
+- construct your linear system $A x = b$
+- construct and adaptor for $A$ (see [mcgs::CSRAdaptor](../../docs/html/structmcgs_1_1CSRAdaptor.html))
+- compute a coloring of $A$ (see [mcgs::color](../../docs/html/namespacemcgs.html#ad660f970843b8c8edea18c6e9291f6e5))
+- construct a partition of $A$ with respect to the coloring (see [mcgs::makePartition](../../docs/html/namespacemcgs.html#adbeb4189f3eadcb713e803cf94aa38cf))
+- reorder the system with respect to the coloring (see [mcgs::reorder](../../docs/html/namespacemcgs.html#a036dc0fa0b11d8adb71427d78ac7b7ad))
+- perform Gauss-Seidel iterations **using the reordered partition** (see [mcgs::solve](../../docs/html/namespacemcgs.html#ae862fac411e001950f012872f6ac7e0c))
+- *optional: restore the original order of your system* (see [mcgs::revertReorder](../../docs/html/namespacemcgs.html#aa1175b9934a8890204c6be445ea4407e))
+- deallocate partitions (see [mcgs::destroyPartition](../../docs/html/namespacemcgs.html#ad619ded9f67d8a9f379ad7e4b759d854))
+
+
+### C++ Example Snippet
+
+```cpp
+#include "mcgs/mcgs.hpp"
+
+...
+
+// Any CSR matrix will do but for the sake of familiarity, let's assume you're using Eigen.
+Eigen::SparseMatrix A; // <== left hand side matrix
+Eigen::Matrix b; // <== right hand side vector
+
+// Construct an adaptor for your matrix.
+mcgs::CSRAdaptor*index type=*/ int, /*value type=*/double> adaptor;
+adaptor.rowCount = A.rows();
+adaptor.columnCount = A.cols();
+adaptor.entryCount = A.nonZeros();
+adaptor.pRowExtents = A.outerIndexPtr();
+adaptor.pColumnIndices = A.innerIndexPtr();
+adaptor.pEntries = A.innerNonZeroPtr();
+
+// Color the rows of your matrix.
+std::vector colors(adaptor.rowCount);
+mcgs::color(colors.data(),
+ adaptor,
+ mcgs::ColorSettings {});
+
+// Construct a partition for your matrix with respect to the coloring,
+// and reorder the system accordingly. Note that this mutates your original matrix!
+auto pPartition = mcgs::makePartition(colors.data(), adaptor.rowCount);
+auto pReorderedPartition = mcgs::reorder(A.rows(), A.cols(), A.nonZeros(),
+ A.outerIndexPtr(), A.innerIndexPtr(), A.innerNonZeroPtr(),
+ b.data());
+
+// Do 10 Gauss-Seidel iterations.
+std::vector x(adaptor.columnCount);
+mcgs::SolveSettings*index type=*/int, /*value type=*/double> settings;
+settings.maxIterations = 10;
+settings.parallelization = mcgs::Parallelization::RowWise; // <== default parallelization strategy, check out the other ones as well.
+mcgs::solve(x.data(), adaptor, b.data(), pReorderedPartition, settings);
+
+// Optional: if you need to recover your original system,
+// you need to undo the reordering.
+// See mcgs::revertReorder
+
+// Cleanup
+mcgs::destroyPartition(pPartition);
+mcgs::destroyPartition(pReorderedPartition);
+```
+
+## Requirements
+
+- C++ compiler with full C++17 support (GCC or Clang are tested)
+- CMake version 3.15 or later
+- [optional] OpenMP 2.0 or later for shared memory parallelization
+
+## Installation
+
+MCGS is written in C++ and uses CMake as a build system. Building produces a single shared library and matching header.
+
+### Build and Install
+ ```bash
+ cmake \
+ -B \
+ -DCMAKE_INSTALL_PREFIX= \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DMCGS_SHARED_MEMORY_PARALLELISM=OpenMP
+
+ cmake --build --target install
+ ```
+
+### Build options
+
+- CMake options for `MCGS_SHARED_MEMORY_PARALLELISM`
+ - `None`: no parallelization
+ - `OpenMP`: use OpenMP for shared memory parallelization
+
+### Include MCGS in a CMake project
+
+```cmake
+find_package(MCGS REQUIRED)
+target_link_libraries( PRIVATE mcgs)
+```
diff --git a/src/solve.cpp b/src/solve.cpp
index 8b44f30..3c228e4 100644
--- a/src/solve.cpp
+++ b/src/solve.cpp
@@ -363,13 +363,13 @@ int solve(TValue* pSolution,
const auto threadCount = threadCounts[iPartition];
if (pPartition->isContiguous()) {
if (dispatchSweep(pSolution,
- buffer.data(),
- rMatrix,
- pRHS,
- settings,
- *pPartition->begin(iPartition),
- *pPartition->end(iPartition),
- threadCount) != MCGS_SUCCESS) {
+ buffer.data(),
+ rMatrix,
+ pRHS,
+ settings,
+ *pPartition->begin(iPartition),
+ *pPartition->end(iPartition),
+ threadCount) != MCGS_SUCCESS) {
if (1 <= settings.verbosity) {
std::cerr << "mcgs: error: parallel Gauss-Seidel failed at iteration "
<< iIteration