Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Alternative CUDA implementation of seed finding #230

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/include/traccc/edm/spacepoint.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ struct spacepoint {
scalar radius() const {
return std::sqrt(global[0] * global[0] + global[1] * global[1]);
}
TRACCC_HOST_DEVICE
scalar phi() const { return std::atan2(y(), x()); }
};

/// Comparison / ordering operator for spacepoints
Expand Down
16 changes: 16 additions & 0 deletions core/include/traccc/seeding/doublet_finding_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ struct doublet_finding_helper {
const internal_spacepoint<spacepoint>& sp2,
const seedfinder_config& config);

static inline TRACCC_HOST_DEVICE bool isCompatible(
bool top, const internal_spacepoint<spacepoint>& sp1,
const internal_spacepoint<spacepoint>& sp2,
const seedfinder_config& config);

/// Do the conformal transformation on doublet's coordinate
///
/// @param sp1 is middle spacepoint
Expand All @@ -48,6 +53,17 @@ struct doublet_finding_helper {
const internal_spacepoint<spacepoint>& sp2);
};

bool doublet_finding_helper::isCompatible(
bool top, const internal_spacepoint<spacepoint>& sp1,
const internal_spacepoint<spacepoint>& sp2,
const seedfinder_config& config) {
if (top) {
return isCompatible<details::spacepoint_type::top>(sp1, sp2, config);
} else {
return isCompatible<details::spacepoint_type::bottom>(sp1, sp2, config);
}
}

template <details::spacepoint_type otherSpType>
bool TRACCC_HOST_DEVICE
doublet_finding_helper::isCompatible(const internal_spacepoint<spacepoint>& sp1,
Expand Down
4 changes: 4 additions & 0 deletions device/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ traccc_add_library( traccc_cuda cuda TYPE SHARED
"src/seeding/spacepoint_binning.cu"
"include/traccc/cuda/seeding/experimental/spacepoint_formation.hpp"
"src/seeding/experimental/spacepoint_formation.cu"
# Alternate seeding.
"src/seeding2/seed_finding2.cu"
"src/seeding2/kernels/seed_finding_kernel.cu"
"src/seeding2/kernels/kd_tree_kernel.cu"
# Clusterization
"include/traccc/cuda/clusterization/clusterization_algorithm.hpp"
"src/clusterization/clusterization_algorithm.cu"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/** TRACCC library, part of the ACTS project (R&D line)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be a public header? Should it not just live in the src/ directory?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In principle yes, I was in two minds about this because generalized accelerator structure construction code may be useful elsewhere, but we can put it in a private header also.

*
* (c) 2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

#include <traccc/cuda/seeding2/types/kd_tree.hpp>
#include <traccc/edm/internal_spacepoint.hpp>
#include <traccc/edm/spacepoint.hpp>
#include <vecmem/utils/copy.hpp>
#include <vector>

namespace traccc::cuda {
/**
* @brief Creates a k-d tree from a given set of spacepoints.
*
* @return A pair containing the k-d tree nodes as well as the number of nodes.
*/
std::tuple<kd_tree_owning_t, uint32_t, vecmem::data::vector_buffer<std::size_t>>
create_kd_tree(vecmem::memory_resource&, vecmem::copy& copy,
const spacepoint_collection_types::const_view&);
} // namespace traccc::cuda
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

#include <traccc/cuda/seeding2/types/kd_tree.hpp>
#include <traccc/edm/internal_spacepoint.hpp>
#include <traccc/edm/seed.hpp>
#include <traccc/edm/spacepoint.hpp>
#include <traccc/seeding/detail/seeding_config.hpp>
#include <vecmem/utils/copy.hpp>

namespace traccc::cuda {
/**
* @brief Execute the seed finding kernel itself.
*
* @return A pair containing the list of internal seeds as well as the number
* of seeds.
*/
seed_collection_types::buffer run_seeding(
seedfinder_config, seedfilter_config, vecmem::memory_resource &,
vecmem::copy &, const spacepoint_collection_types::const_view &, kd_tree_t);
} // namespace traccc::cuda
46 changes: 46 additions & 0 deletions device/cuda/include/traccc/cuda/seeding2/seed_finding.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/** TRACCC library, part of the ACTS project (R&D line)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it's really just this header that strictly needs to be public, no? 🤔

*
* (c) 2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

#include <traccc/edm/seed.hpp>
#include <traccc/edm/spacepoint.hpp>
#include <traccc/seeding/detail/seeding_config.hpp>
#include <traccc/utils/algorithm.hpp>
#include <traccc/utils/memory_resource.hpp>
#include <vecmem/memory/cuda/device_memory_resource.hpp>
#include <vecmem/utils/copy.hpp>

#include "traccc/cuda/utils/stream.hpp"

namespace traccc::cuda {
/**
* @brief Alternative seed finding algorithm, using orthogonal range search
* implemented through a k-d tree.
*/
class seed_finding2 : public algorithm<seed_collection_types::buffer(
const spacepoint_collection_types::const_view&)> {
public:
seed_finding2(const seedfinder_config& config,
const seedfilter_config& filter_config,
const traccc::memory_resource& mr, vecmem::copy& copy,
stream& str);

output_type operator()(
const spacepoint_collection_types::const_view& sps) const override;

private:
traccc::memory_resource m_mr;
vecmem::copy& m_copy;
stream& m_stream;

seedfinder_config m_seedfinder_config;
seedfilter_config m_seedfilter_config;

int m_warp_size;
};
} // namespace traccc::cuda
36 changes: 36 additions & 0 deletions device/cuda/include/traccc/cuda/seeding2/types/kd_tree.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

#include <traccc/cuda/seeding2/types/range3d.hpp>
#include <traccc/utils/array_wrapper.hpp>
#include <vecmem/memory/unique_ptr.hpp>

namespace traccc::cuda {
enum class nodetype_e { LEAF, INTERNAL, NON_EXTANT };

enum class pivot_e { Phi, R, Z };

template <template <typename> typename F>
struct node_t {
using tuple_t =
std::tuple<nodetype_e, range3d, uint32_t, uint32_t, pivot_e, float>;

F<nodetype_e> type;
F<range3d> range;
F<uint32_t> begin, end;
F<pivot_e> dim;
F<float> mid;
};

using kd_tree = array_wrapper<soa, node_t>;

using kd_tree_owning_t = kd_tree::owner;

using kd_tree_t = kd_tree::handle;
} // namespace traccc::cuda
73 changes: 73 additions & 0 deletions device/cuda/include/traccc/cuda/seeding2/types/range3d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2022 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

#include <array>
#include <cstdint>

namespace traccc::cuda {
/**
* @brief Range in three dimensions, defined by minima and maxima in the phi,
* r, and z dimensions.
*/
struct range3d {
float phi_min, phi_max, r_min, r_max, z_min, z_max;

__host__ __device__ __forceinline__ static range3d Infinite() {
range3d r;

r.phi_min = std::numeric_limits<float>::lowest();
r.phi_max = std::numeric_limits<float>::max();
r.r_min = 0.f;
r.r_max = std::numeric_limits<float>::max();
r.z_min = std::numeric_limits<float>::lowest();
r.z_max = std::numeric_limits<float>::max();

return r;
}

__host__ __device__ __forceinline__ static range3d Degenerate() {
range3d r;

r.phi_min = std::numeric_limits<float>::max();
r.phi_max = std::numeric_limits<float>::lowest();
r.r_min = std::numeric_limits<float>::max();
r.r_max = 0.f;
r.z_min = std::numeric_limits<float>::max();
r.z_max = std::numeric_limits<float>::lowest();

return r;
}

__host__ __device__ __forceinline__ static range3d Union(const range3d& a,
const range3d& b) {
return {std::min(a.phi_min, b.phi_min), std::max(a.phi_max, b.phi_max),
std::min(a.r_min, b.r_min), std::max(a.r_max, b.r_max),
std::min(a.z_min, b.z_min), std::max(a.z_max, b.z_max)};
}

__host__ __device__ __forceinline__ bool intersects(
const range3d& o) const {
return phi_min <= o.phi_max && o.phi_min < phi_max &&
r_min <= o.r_max && o.r_min < r_max && z_min <= o.z_max &&
o.z_min < z_max;
}

__host__ __device__ __forceinline__ bool dominates(const range3d& o) const {
return phi_min <= o.phi_min && o.phi_max <= phi_max &&
r_min <= o.r_min && o.r_max <= r_max && z_min <= o.z_min &&
o.z_max <= z_max;
}

__host__ __device__ __forceinline__ bool contains(float phi, float r,
float z) const {
return phi_min <= phi && phi <= phi_max && r_min <= r && r <= r_max &&
z_min <= z && z <= z_max;
}
};
} // namespace traccc::cuda
120 changes: 120 additions & 0 deletions device/cuda/include/traccc/cuda/utils/sort.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2021 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

namespace traccc::cuda {
/**
* @brief Swap two values of arbitrary type.
*
* @tparam T The type of values to swap.
*
* @param a The first object in the swap (will take the value of b).
* @param b The second object in the swap (will take the value of a).
*/
template <typename T>
__device__ __forceinline__ void swap(T& a, T& b) {
T t = a;
a = b;
b = t;
}

/**
* @brief Perform a block-wide odd-even key sorting.
*
* This function performs a sorting operation across the entire block, assuming
* that all the threads in the block are currently active.
*
* @warning The behaviour of this function is ill-defined if any of the threads
* in the block have exited.
*
* @warning This method is efficient for sorting small arrays, preferable in
* shared memory, but given the O(n^2) worst-case performance this should not
* be used on larger arrays.
*
* @tparam K The type of keys to sort.
* @tparam C The type of the comparison function.
*
* @param keys An array of keys to sort.
* @param num_keys The number of keys in the array to sort.
* @param comparison A comparison function.
*/
template <typename K, typename C>
__device__ void blockOddEvenKeySort(K* keys, uint32_t num_keys,
C&& comparison) {
bool sorted;

do {
sorted = true;

for (uint32_t j = 2 * threadIdx.x + 1; j < num_keys - 1;
j += 2 * blockDim.x) {
if (comparison(keys[j + 1], keys[j])) {
swap(keys[j + 1], keys[j]);
sorted = false;
}
}

__syncthreads();

for (uint32_t j = 2 * threadIdx.x; j < num_keys - 1;
j += 2 * blockDim.x) {
if (comparison(keys[j + 1], keys[j])) {
swap(keys[j + 1], keys[j]);
sorted = false;
}
}
} while (__syncthreads_or(!sorted));
}

/**
* @brief Perform a warp-wide odd-even key sorting.
*
* This function performs a sorting operation across a single warp, assuming
* that all the threads in the warp are currently active.
*
* @warning The behaviour of this function is ill-defined if any of the threads
* in the warp have exited.
*
* @warning This method is efficient for sorting small arrays, preferable in
* shared memory, but given the O(n^2) worst-case performance this should not
* be used on larger arrays.
*
* @tparam K The type of keys to sort.
* @tparam C The type of the comparison function.
*
* @param keys An array of keys to sort.
* @param num_keys The number of keys in the array to sort.
* @param comparison A comparison function.
*/
template <typename K, typename C>
__device__ void warpOddEvenKeySort(K* keys, uint32_t num_keys, C&& comparison) {
bool sorted;

do {
sorted = true;

for (uint32_t j = 2 * (threadIdx.x % warpSize) + 1; j < num_keys - 1;
j += 2 * warpSize) {
if (comparison(keys[j + 1], keys[j])) {
swap(keys[j + 1], keys[j]);
sorted = false;
}
}

__syncwarp(0xFFFFFFFF);

for (uint32_t j = 2 * (threadIdx.x % warpSize); j < num_keys - 1;
j += 2 * warpSize) {
if (comparison(keys[j + 1], keys[j])) {
swap(keys[j + 1], keys[j]);
sorted = false;
}
}
} while (__any_sync(0xFFFFFFFF, !sorted));
}
} // namespace traccc::cuda
Loading
Loading