Skip to content

Commit

Permalink
Checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
Stephen Nicholas Swatman committed Mar 8, 2023
1 parent 50a02e2 commit 5bc9530
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 176 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

#include <traccc/cuda/seeding2/types/internal_sp.hpp>
#include <traccc/cuda/seeding2/types/kd_tree.hpp>
#include <traccc/edm/alt_seed.hpp>
#include <traccc/edm/internal_spacepoint.hpp>
#include <traccc/edm/spacepoint.hpp>
#include <traccc/edm/alt_seed.hpp>
#include <traccc/seeding/detail/seeding_config.hpp>

namespace traccc::cuda {
Expand Down
3 changes: 2 additions & 1 deletion device/cuda/include/traccc/cuda/utils/device_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
#define MAX_RESIDENT_THREADS_PER_SM 1536
#elif __CUDA_ARCH__ <= 900
#define MAX_RESIDENT_THREADS_PER_SM 2048
#warning "Unknown CUDA architecture, setting maximum resident threads per block to 1024."
#warning \
"Unknown CUDA architecture, setting maximum resident threads per block to 1024."
#define MAX_RESIDENT_THREADS_PER_SM 1024
#endif

Expand Down
72 changes: 72 additions & 0 deletions device/cuda/include/traccc/cuda/utils/sort.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2021 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

namespace traccc::cuda {
template <typename T>
__device__ void swap(T& a, T& b) {
T t = a;
a = b;
b = t;
}

template <typename K, typename C>
__device__ void blockOddEvenKeySort(K* keys, uint32_t num_keys,
C&& comparison) {
bool sorted;

do {
sorted = true;

for (uint32_t j = 2 * threadIdx.x + 1; j < num_keys - 1;
j += 2 * blockDim.x) {
if (comparison(keys[j + 1], keys[j])) {
swap(keys[j + 1], keys[j]);
sorted = false;
}
}

__syncthreads();

for (uint32_t j = 2 * threadIdx.x; j < num_keys - 1;
j += 2 * blockDim.x) {
if (comparison(keys[j + 1], keys[j])) {
swap(keys[j + 1], keys[j]);
sorted = false;
}
}
} while (__syncthreads_or(!sorted));
}

template <typename K, typename C>
__device__ void warpOddEvenKeySort(K* keys, uint32_t num_keys, C&& comparison) {
bool sorted;

do {
sorted = true;

for (uint32_t j = 2 * (threadIdx.x % WARP_SIZE) + 1; j < num_keys - 1;
j += 2 * WARP_SIZE) {
if (comparison(keys[j + 1], keys[j])) {
swap(keys[j + 1], keys[j]);
sorted = false;
}
}

__syncwarp(__activemask());

for (uint32_t j = 2 * (threadIdx.x % WARP_SIZE); j < num_keys - 1;
j += 2 * WARP_SIZE) {
if (comparison(keys[j + 1], keys[j])) {
swap(keys[j + 1], keys[j]);
sorted = false;
}
}
} while (__any_sync(__activemask(), !sorted));
}
} // namespace traccc::cuda
Loading

0 comments on commit 5bc9530

Please sign in to comment.