Skip to content

Commit

Permalink
format readme and all cpp hpp files
Browse files Browse the repository at this point in the history
Committer: Weile Wei <[email protected]>
  • Loading branch information
weilewei committed Mar 30, 2023
1 parent 086dfe7 commit c385e2e
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 131 deletions.
29 changes: 21 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# C++ Parallel Algorithms Benchmark

This is a brief evaluation of parallel algorithms in C++. The main focus is on parallel
transform and sort algorithms, which are available in several parallel frameworks such as
Intel HPX, Kokkos, TBB, gnu, and nvhpc. To conduct the benchmark, a vector of random numbers is
first allocated and then subjected to a range of parallel algorithms. The primary objective of
the benchmark is to gain valuable insights into the performance of different parallel algorithms
and frameworks.
transform and sort algorithms, which are available in several parallel frameworks
such as Intel HPX, Kokkos, TBB, gnu, and nvhpc. To conduct the benchmark, a
vector of random numbers is first allocated and then subjected to a range of parallel
algorithms. The primary objective of the benchmark is to gain valuable insights
into the performance of different parallel algorithms and frameworks.

## Example code

Expand Down Expand Up @@ -50,6 +50,16 @@ __gnu_parallel::transform(workVec.begin(), workVec.end(),
workVec.begin(), [](double arg){ return std::tan(arg); });
```

* Taskflow parallel transform

```cpp
tf::Executor executor(num_threads);

tf::Taskflow t1;
t1.for_each(workVec.begin(), workVec.end(), [] (double& arg) {
arg = std::tan(arg);});
```
## How to Build
Example build script:
Expand Down Expand Up @@ -88,7 +98,8 @@ do
for NUM_THREADS in 1 2 4 8 16 32 64 128
do
echo "running nvcPar_cpu with $SIZE workload and $NUM_THREADS threads"
OMP_NUM_THREADS=$NUM_THREADS OMP_PROC_BIND=spread OMP_PLACES=threads ./nvcPar_cpu $SIZE
OMP_NUM_THREADS=$NUM_THREADS OMP_PROC_BIND=spread OMP_PLACES=threads
./nvcPar_cpu $SIZE

echo "running hpxPar_gcc with $SIZE workload and $NUM_THREADS"
./hpxPar_gcc $SIZE --hpx:threads=$NUM_THREADS
Expand All @@ -97,10 +108,12 @@ do
./hpxPar_clang $SIZE --hpx:threads=$NUM_THREADS

echo "running gnuPar_gcc with $SIZE workload and $NUM_THREADS"
OMP_NUM_THREADS=$NUM_THREADS OMP_PROC_BIND=spread OMP_PLACES=threads ./gnuPar_gcc $SIZE
OMP_NUM_THREADS=$NUM_THREADS OMP_PROC_BIND=spread OMP_PLACES=threads
./gnuPar_gcc $SIZE

echo "running gnuPar_clang with $SIZE workload and $NUM_THREADS"
OMP_NUM_THREADS=$NUM_THREADS OMP_PROC_BIND=spread OMP_PLACES=threads ./gnuPar_clang $SIZE
OMP_NUM_THREADS=$NUM_THREADS OMP_PROC_BIND=spread OMP_PLACES=threads
./gnuPar_clang $SIZE

echo "running kokkosPar_openmp_gcc with $SIZE workload and $NUM_THREADS"
OMP_PROC_BIND=spread OMP_PLACES=threads ./kokkosPar_openmp_gcc $SIZE --kokkos-num-threads=$NUM_THREADS
Expand Down
25 changes: 14 additions & 11 deletions commons/commons.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <random>
#include <chrono>
#include <iostream>
#include <random>

#include <algorithm>
#include <cmath>
Expand All @@ -9,25 +9,28 @@
#include <string>
#include <vector>

std::random_device rd; // only used once to initialise (seed) engine
std::mt19937 rng(rd()); // random-number engine used (Mersenne-Twister in this case)
std::uniform_int_distribution<int> uni(0,65535); // guaranteed unbiased
std::random_device rd; // only used once to initialise (seed) engine
std::mt19937
rng(rd()); // random-number engine used (Mersenne-Twister in this case)
std::uniform_int_distribution<int> uni(0, 65535); // guaranteed unbiased

// default repeatition time is 5
constexpr long long REPEAT = 5;

template <typename Func>
void getExecutionTime(const std::string& title, Func func){
void getExecutionTime(const std::string &title, Func func) {

std::chrono::duration<double> totalDur =
std::chrono::duration<double>::zero();

std::chrono::duration<double> totalDur = std::chrono::duration<double>::zero();

for(size_t i = 0; i < REPEAT; i++) {
for (size_t i = 0; i < REPEAT; i++) {
const auto sta = std::chrono::steady_clock::now();
func();
const std::chrono::duration<double> dur = std::chrono::steady_clock::now() - sta;
const std::chrono::duration<double> dur =
std::chrono::steady_clock::now() - sta;
totalDur += dur;
}

std::cout << title << ": " << totalDur.count() / REPEAT <<
" sec. averaged over " << REPEAT << " runs." << std::endl;
std::cout << title << ": " << totalDur.count() / REPEAT
<< " sec. averaged over " << REPEAT << " runs." << std::endl;
}
23 changes: 11 additions & 12 deletions gnu/gnuPar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,30 @@

void runTaskSize(const long long length) {
std::vector<double> workVec(length);
for (size_t i =0; i < length; ++i) {
for (size_t i = 0; i < length; ++i) {
workVec[i] = uni(rng);
}

getExecutionTime("__gnu_parallel::transform", [workVec]() mutable {
__gnu_parallel::transform(workVec.begin(), workVec.end(),
workVec.begin(),
[](double arg){ return std::tan(arg); }
);
});

__gnu_parallel::transform(workVec.begin(), workVec.end(), workVec.begin(),
[](double arg) { return std::tan(arg); });
});

getExecutionTime("__gnu_parallel::sort", [workVec]() mutable {
__gnu_parallel::sort(workVec.begin(), workVec.end());});
__gnu_parallel::sort(workVec.begin(), workVec.end());
});

std::cout << '\n';
}


int main(int argc, char** argv){
int main(int argc, char **argv) {
long long length;

char *a = argv[1];
length = atoll(a);

std::cout << "running experiment with size of " + std::to_string(length) << "\n";
std::cout << "running experiment with size of " + std::to_string(length)
<< "\n";

runTaskSize(length);
}
43 changes: 19 additions & 24 deletions hpx/hpxPar.cpp
Original file line number Diff line number Diff line change
@@ -1,59 +1,54 @@
#include <commons.hpp>

#include <hpx/hpx_main.hpp>
#include <hpx/iostream.hpp>
#include <hpx/algorithm.hpp>
#include <hpx/execution.hpp>

void runTaskSize(const long long length){
#include <hpx/hpx_main.hpp>
#include <hpx/iostream.hpp>

void runTaskSize(const long long length) {
std::vector<double> workVec(length);
for (size_t i =0; i < length; ++i) {
for (size_t i = 0; i < length; ++i) {
workVec[i] = uni(rng);
}

getExecutionTime("transform hpx::execution::seq", [workVec]() mutable {
hpx::transform(hpx::execution::seq, workVec.begin(), workVec.end(),
workVec.begin(),
[](double arg){ return std::tan(arg); }
);
});

workVec.begin(), [](double arg) { return std::tan(arg); });
});

getExecutionTime("transform hpx::execution::par", [workVec]() mutable {
hpx::transform(hpx::execution::par, workVec.begin(), workVec.end(),
workVec.begin(),
[](double arg){ return std::tan(arg); }
);
workVec.begin(), [](double arg) { return std::tan(arg); });
});

getExecutionTime("transform hpx::execution::par_unseq", [workVec]() mutable {
hpx::transform(hpx::execution::par_unseq, workVec.begin(), workVec.end(),
workVec.begin(),
[](double arg){ return std::tan(arg); }
);
workVec.begin(), [](double arg) { return std::tan(arg); });
});

getExecutionTime("sort hpx::execution::seq", [workVec]() mutable {
hpx::sort(hpx::execution::seq, workVec.begin(), workVec.end());
});
getExecutionTime("sort hpx ::execution::par", [workVec]() mutable {

getExecutionTime("sort hpx ::execution::par", [workVec]() mutable {
hpx::sort(hpx::execution::par, workVec.begin(), workVec.end());
});

getExecutionTime("sort hpx::execution::par_unseq", [workVec]() mutable {
hpx::sort(hpx::execution::par_unseq, workVec.begin(), workVec.end());
hpx::sort(hpx::execution::par_unseq, workVec.begin(), workVec.end());
});

std::cout << '\n';
}

int main(int argc, char** argv){
int main(int argc, char **argv) {
long long length;

char *a = argv[1];
length = atoll(a);

std::cout << "running experiment with size of " + std::to_string(length) << "\n";
std::cout << "running experiment with size of " + std::to_string(length)
<< "\n";

runTaskSize(length);
}
30 changes: 15 additions & 15 deletions kokkos/kokkosPar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,46 @@
#include <Kokkos_Sort.hpp>

void runTaskSize(const long long length) {
Kokkos::View<double*> workVec ("x", length);
Kokkos::View<double *> workVec("x", length);

// Prevent touching workVec in serial so we don't mess up data affinity
std::vector<double> workVecData(length);
for (long long i = 0 ; i < length; ++i)
for (long long i = 0; i < length; ++i)
workVecData[i] = uni(rng);

Kokkos::parallel_for(length, KOKKOS_LAMBDA(const int& i)
{ workVec(i) = workVecData[i]; });
Kokkos::parallel_for(
length, KOKKOS_LAMBDA(const int &i) { workVec(i) = workVecData[i]; });

auto test = [=]() {
Kokkos::parallel_for("kokkos::parallel_for transform optimized version",
Kokkos::RangePolicy<Kokkos::IndexType<int>, Kokkos::Schedule<Kokkos::Dynamic>>
(0, length), KOKKOS_LAMBDA (const int& i) {
workVec(i) = std::tan(workVec(i));
});
Kokkos::parallel_for(
"kokkos::parallel_for transform optimized version",
Kokkos::RangePolicy<Kokkos::IndexType<int>,
Kokkos::Schedule<Kokkos::Dynamic>>(0, length),
KOKKOS_LAMBDA(const int &i) { workVec(i) = std::tan(workVec(i)); });
};

test();
getExecutionTime("kokkos::parallel_for transform optimized version", test);

// Kokkos sort will take much longer time to complete, so we exclude it in the benchmark
// getExecutionTime("sort kokkos", [workVec]() mutable {
// Kokkos::sort(workVec);
// Kokkos sort will take much longer time to complete, so we exclude it in the
// benchmark getExecutionTime("sort kokkos", [workVec]() mutable {
// Kokkos::sort(workVec);
// });
}

int main(int argc, char* argv[]) {
int main(int argc, char *argv[]) {
Kokkos::initialize(argc, argv);

long long length;

char *a = argv[1];
length = atoll(a);

std::cout << "running experiment with size of " + std::to_string(length) << "\n";
std::cout << "running experiment with size of " + std::to_string(length)
<< "\n";

runTaskSize(length);

Kokkos::finalize();
std::cout << "\n";

}
43 changes: 19 additions & 24 deletions nvc/nvcPar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,55 +4,50 @@
void runTaskSize(const long long length) {

std::vector<double> workVec(length);
for (size_t i =0; i < length; ++i) {
for (size_t i = 0; i < length; ++i) {
workVec[i] = uni(rng);
}

getExecutionTime("nvc++ transform std::execution::seq", [workVec]() mutable {
std::transform(std::execution::seq, workVec.begin(), workVec.end(),
workVec.begin(),
[](double arg){ return std::tan(arg); }
);
});

workVec.begin(), [](double arg) { return std::tan(arg); });
});

getExecutionTime("nvc++ transform std::execution::par", [workVec]() mutable {
std::transform(std::execution::par, workVec.begin(), workVec.end(),
workVec.begin(),
[](double arg){ return std::tan(arg); }
);
});

getExecutionTime("nvc++ transform std::execution::par_unseq", [workVec]() mutable {
std::transform(std::execution::par_unseq, workVec.begin(), workVec.end(),
workVec.begin(),
[](double arg){ return std::tan(arg); }
);
workVec.begin(), [](double arg) { return std::tan(arg); });
});

getExecutionTime("nvc++ transform std::execution::par_unseq",
[workVec]() mutable {
std::transform(std::execution::par_unseq, workVec.begin(),
workVec.end(), workVec.begin(),
[](double arg) { return std::tan(arg); });
});

getExecutionTime("nvc++ sort std::execution::seq", [workVec]() mutable {
std::sort(std::execution::seq, workVec.begin(), workVec.end());
});
getExecutionTime("nvc++ sort std::execution::par", [workVec]() mutable {
std::sort(std::execution::par, workVec.begin(), workVec.end());

getExecutionTime("nvc++ sort std::execution::par", [workVec]() mutable {
std::sort(std::execution::par, workVec.begin(), workVec.end());
});

getExecutionTime("nvc++ sort std::execution::par_unseq", [workVec]() mutable {
std::sort(std::execution::par_unseq, workVec.begin(), workVec.end());
std::sort(std::execution::par_unseq, workVec.begin(), workVec.end());
});

std::cout << '\n';
}


int main(int argc, char** argv){
int main(int argc, char **argv) {
long long length;

char *a = argv[1];
length = atoll(a);

std::cout << "running experiment with size of " + std::to_string(length) << "\n";
std::cout << "running experiment with size of " + std::to_string(length)
<< "\n";

runTaskSize(length);
}
Loading

0 comments on commit c385e2e

Please sign in to comment.