Skip to content

Commit

Permalink
UpdatE
Browse files Browse the repository at this point in the history
  • Loading branch information
stephenswat committed Jun 25, 2024
1 parent 82de631 commit c79b097
Show file tree
Hide file tree
Showing 9 changed files with 79 additions and 177 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

#pragma once

#include <traccc/cuda/seeding2/types/internal_sp.hpp>
#include <traccc/cuda/seeding2/types/kd_tree.hpp>
#include <traccc/edm/internal_spacepoint.hpp>
#include <traccc/edm/spacepoint.hpp>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@

#pragma once

#include <traccc/cuda/seeding2/types/internal_sp.hpp>
#include <traccc/cuda/seeding2/types/kd_tree.hpp>
#include <traccc/edm/alt_seed.hpp>
#include <traccc/edm/seed.hpp>
#include <traccc/edm/internal_spacepoint.hpp>
#include <traccc/edm/spacepoint.hpp>
#include <traccc/seeding/detail/seeding_config.hpp>
Expand All @@ -21,7 +20,7 @@ namespace traccc::cuda {
* @return A pair containing the list of internal seeds as well as the number
* of seeds.
*/
std::pair<vecmem::unique_alloc_ptr<alt_seed[]>, uint32_t> run_seeding(
std::pair<vecmem::unique_alloc_ptr<seed[]>, uint32_t> run_seeding(
seedfinder_config, seedfilter_config, vecmem::memory_resource&,
internal_sp_t, kd_tree_t);
const spacepoint_collection_types::const_view&, kd_tree_t);
} // namespace traccc::cuda

This file was deleted.

22 changes: 16 additions & 6 deletions device/cuda/include/traccc/cuda/seeding2/seed_finding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,39 @@

#pragma once

#include <traccc/edm/alt_seed.hpp>
#include <traccc/edm/seed.hpp>
#include <traccc/edm/spacepoint.hpp>
#include <traccc/seeding/detail/seeding_config.hpp>
#include <traccc/utils/algorithm.hpp>
#include <traccc/utils/memory_resource.hpp>
#include <vecmem/memory/cuda/device_memory_resource.hpp>
#include "traccc/cuda/utils/stream.hpp"
#include <vecmem/utils/copy.hpp>

namespace traccc::cuda {
/**
* @brief Alternative seed finding algorithm, using orthogonal range search
* implemented through a k-d tree.
*/
class seed_finding2 : public algorithm<alt_seed_collection_types::buffer(
class seed_finding2 : public algorithm<seed_collection_types::buffer(
const spacepoint_collection_types::const_view&)> {
public:
seed_finding2(const traccc::memory_resource& mr);
seed_finding2(const seedfinder_config& config,
const seedfilter_config& filter_config,
const traccc::memory_resource& mr,
vecmem::copy& copy, stream& str);

output_type operator()(
const spacepoint_collection_types::const_view& sps) const override;

private:
traccc::memory_resource m_output_mr;
seedfinder_config m_finder_conf;
seedfilter_config m_filter_conf;
traccc::memory_resource m_mr;
vecmem::copy& m_copy;
stream& m_stream;

seedfinder_config m_seedfinder_config;
seedfilter_config m_seedfilter_config;

int m_warp_size;
};
} // namespace traccc::cuda
35 changes: 0 additions & 35 deletions device/cuda/include/traccc/cuda/seeding2/types/internal_sp.hpp

This file was deleted.

30 changes: 15 additions & 15 deletions device/cuda/src/seeding2/kernels/kd_tree_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@

#include <iostream>
#include <limits>
#include "../../utils/cuda_error_handling.hpp"
#include <memory>
#include <traccc/cuda/seeding2/kernels/seed_finding_kernel.hpp>
#include <traccc/cuda/seeding2/seed_finding.hpp>
#include <traccc/cuda/seeding2/types/kd_tree.hpp>
#include <traccc/cuda/seeding2/types/range3d.hpp>
#include <traccc/cuda/utils/definitions.hpp>
#include <traccc/cuda/utils/device_traits.hpp>
#include <traccc/cuda/utils/sort.hpp>
#include <traccc/edm/seed.hpp>
Expand Down Expand Up @@ -691,7 +691,7 @@ std::tuple<kd_tree_owning_t, uint32_t, internal_sp_owning_t> create_kd_tree(
vecmem::unique_alloc_ptr<uint32_t> extra_indices =
vecmem::make_unique_alloc<uint32_t>(mr);

CUDA_ERROR_CHECK(cudaMemset(extra_indices.get(), 0, sizeof(uint32_t)));
TRACCC_CUDA_ERROR_CHECK(cudaMemset(extra_indices.get(), 0, sizeof(uint32_t)));

/*
* Launch the index initialization kernel, which turns n spacepoints into
Expand All @@ -703,15 +703,15 @@ std::tuple<kd_tree_owning_t, uint32_t, internal_sp_owning_t> create_kd_tree(
threads_per_block1>>>(spacepoints, indices.get(),
n_sp, extra_indices.get());

CUDA_ERROR_CHECK(cudaGetLastError());
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());
TRACCC_CUDA_ERROR_CHECK(cudaDeviceSynchronize());

/*
* Retrieve the number of halo points from the device.
*/
uint32_t extra_indices_h;

CUDA_ERROR_CHECK(cudaMemcpy(&extra_indices_h, extra_indices.get(),
TRACCC_CUDA_ERROR_CHECK(cudaMemcpy(&extra_indices_h, extra_indices.get(),
sizeof(uint32_t), cudaMemcpyDeviceToHost));

/*
Expand Down Expand Up @@ -743,8 +743,8 @@ std::tuple<kd_tree_owning_t, uint32_t, internal_sp_owning_t> create_kd_tree(
vecmem::unique_alloc_ptr<uint32_t> work_list_size_2 =
vecmem::make_unique_alloc<uint32_t>(mr);

CUDA_ERROR_CHECK(cudaMemset(work_list_size_1.get(), 0, sizeof(uint32_t)));
CUDA_ERROR_CHECK(cudaMemset(work_list_size_2.get(), 0, sizeof(uint32_t)));
TRACCC_CUDA_ERROR_CHECK(cudaMemset(work_list_size_1.get(), 0, sizeof(uint32_t)));
TRACCC_CUDA_ERROR_CHECK(cudaMemset(work_list_size_2.get(), 0, sizeof(uint32_t)));

/*
* Run the initialization kernel for the tree itself, which sets the
Expand All @@ -757,8 +757,8 @@ std::tuple<kd_tree_owning_t, uint32_t, internal_sp_owning_t> create_kd_tree(
num_indices, work_list_1.get(),
work_list_size_1.get());

CUDA_ERROR_CHECK(cudaGetLastError());
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());
TRACCC_CUDA_ERROR_CHECK(cudaDeviceSynchronize());

/*
* Allocate some temporary space for the index buffer, one half for "lower"
Expand Down Expand Up @@ -801,19 +801,19 @@ std::tuple<kd_tree_owning_t, uint32_t, internal_sp_owning_t> create_kd_tree(
work_list_size_2.get());
}

CUDA_ERROR_CHECK(cudaGetLastError());
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());
TRACCC_CUDA_ERROR_CHECK(cudaDeviceSynchronize());

iteration++;

CUDA_ERROR_CHECK(cudaMemcpy(&remaining, work_list_size_2.get(),
TRACCC_CUDA_ERROR_CHECK(cudaMemcpy(&remaining, work_list_size_2.get(),
sizeof(uint32_t), cudaMemcpyDeviceToHost));

std::swap(work_list_1, work_list_2);
std::swap(work_list_size_1, work_list_size_2);
std::swap(indices, index_buffer);

CUDA_ERROR_CHECK(
TRACCC_CUDA_ERROR_CHECK(
cudaMemset(work_list_size_2.get(), 0, sizeof(uint32_t)));
}

Expand All @@ -824,8 +824,8 @@ std::tuple<kd_tree_owning_t, uint32_t, internal_sp_owning_t> create_kd_tree(
num_indices, internal_sp_t(spacepoints), internal_sp_t(new_sps),
indices.get());

CUDA_ERROR_CHECK(cudaGetLastError());
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());
TRACCC_CUDA_ERROR_CHECK(cudaDeviceSynchronize());

return {kd_tree_owning_t{std::move(tree_owner)}, num_nodes,
std::move(new_sps)};
Expand Down
39 changes: 19 additions & 20 deletions device/cuda/src/seeding2/kernels/seed_finding_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,24 @@

#include <iostream>
#include <traccc/cuda/seeding2/kernels/seed_finding_kernel.hpp>
#include <traccc/cuda/seeding2/types/internal_sp.hpp>
#include <traccc/cuda/seeding2/types/kd_tree.hpp>
#include <traccc/cuda/seeding2/types/range3d.hpp>
#include <traccc/cuda/utils/definitions.hpp>
#include <traccc/cuda/utils/device_traits.hpp>
#include <traccc/cuda/utils/sort.hpp>
#include <traccc/cuda/utils/sync.hpp>
#include <traccc/edm/alt_seed.hpp>
#include <traccc/edm/seed.hpp>
#include <traccc/edm/internal_spacepoint.hpp>
#include <traccc/edm/spacepoint.hpp>
#include <traccc/seeding/detail/lin_circle.hpp>
#include <traccc/seeding/doublet_finding_helper.hpp>
#include <traccc/seeding/triplet_finding_helper.hpp>
#include "../../utils/cuda_error_handling.hpp"

#define MAX_LOWER_SP_PER_MIDDLE 100u
#define MAX_UPPER_SP_PER_MIDDLE 100u
#define KD_TREE_TRAVERSAL_STACK_SIZE 128u
#define WARPS_PER_BLOCK 8u
#define WARP_SIZE 32u

/**
* @brief Maximum difference in φ between two spacepoints adjacent in the same
Expand Down Expand Up @@ -145,8 +145,7 @@ __device__ void retrieve_from_tree(
doublet_finding_helper::isCompatible(
upper, mi,
internal_spacepoint<spacepoint>(
spacepoints[point_id].x, spacepoints[point_id].y, z,
radius, phi, spacepoints[point_id].link),
point_id, spacepoints[point_id], {0., 0.}),
finder_conf)) {
/*
* Reserve a spot in the output for this point, and
Expand Down Expand Up @@ -198,13 +197,13 @@ __device__ internal_seed make_seed(
const internal_sp_t spacepoints, const kd_tree_t tree, uint32_t lower,
uint32_t middle, internal_spacepoint<spacepoint> mi, uint32_t upper) {
const internal_spacepoint<spacepoint> lo(
spacepoints[lower].x, spacepoints[lower].y, spacepoints[lower].z,
spacepoints[lower].radius, spacepoints[lower].phi,
spacepoints[lower].link);
spacepoints[lower].link, spacepoints[lower].x, spacepoints[lower].y, spacepoints[lower].z,
spacepoints[lower].radius, spacepoints[lower].phi
);
const internal_spacepoint<spacepoint> hi(
spacepoints[upper].x, spacepoints[upper].y, spacepoints[upper].z,
spacepoints[upper].radius, spacepoints[upper].phi,
spacepoints[upper].link);
spacepoints[upper].link, spacepoints[upper].x, spacepoints[upper].y, spacepoints[upper].z,
spacepoints[upper].radius, spacepoints[upper].phi
);

/*
* Find the lin-circles for the bottom and top pair.
Expand Down Expand Up @@ -532,7 +531,7 @@ __global__ void __launch_bounds__(WARP_SIZE* WARPS_PER_BLOCK)
seed_finding_kernel(const seedfinder_config finder_conf,
const seedfilter_config filter_conf,
const internal_sp_t spacepoints, const kd_tree_t tree,
alt_seed* output_seeds, uint32_t* output_seed_size) {
seed* output_seeds, uint32_t* output_seed_size) {
cooperative_groups::thread_block block =
cooperative_groups::this_thread_block();
cooperative_groups::thread_block_tile<WARP_SIZE> warp =
Expand Down Expand Up @@ -624,7 +623,7 @@ __global__ void __launch_bounds__(WARP_SIZE* WARPS_PER_BLOCK)
* than invalid ones (if we have any).
*/
for (uint32_t i = 0; i < num_valid; ++i) {
alt_seed& s = output_seeds[idx + i];
seed& s = output_seeds[idx + i];
uint32_t j = WARP_SIZE + finder_conf.maxSeedsPerSpM - (i + 1);

/*
Expand Down Expand Up @@ -654,23 +653,23 @@ __global__ void __launch_bounds__(WARP_SIZE* WARPS_PER_BLOCK)
* @return A unique pointer to an array of internal seeds, and the size of that
* array.
*/
std::pair<vecmem::unique_alloc_ptr<alt_seed[]>, uint32_t> run_seeding(
std::pair<vecmem::unique_alloc_ptr<seed[]>, uint32_t> run_seeding(
seedfinder_config finder_conf, seedfilter_config filter_conf,
vecmem::memory_resource& mr, const internal_sp_t spacepoints,
const kd_tree_t tree) {
/*
* Allocate space for output of seeds on the device.
*/
vecmem::unique_alloc_ptr<alt_seed[]> seeds_device =
vecmem::make_unique_alloc<alt_seed[]>(
vecmem::unique_alloc_ptr<seed[]> seeds_device =
vecmem::make_unique_alloc<seed[]>(
mr, finder_conf.maxSeedsPerSpM * spacepoints.size());

/*
* Allocate space for seed count on the device.
*/
vecmem::unique_alloc_ptr<uint32_t> seed_count_device =
vecmem::make_unique_alloc<uint32_t>(mr);
CUDA_ERROR_CHECK(cudaMemset(seed_count_device.get(), 0, sizeof(uint32_t)));
TRACCC_CUDA_ERROR_CHECK(cudaMemset(seed_count_device.get(), 0, sizeof(uint32_t)));

/*
* Calculate the total amount of shared memory on top of that which is
Expand All @@ -691,14 +690,14 @@ std::pair<vecmem::unique_alloc_ptr<alt_seed[]>, uint32_t> run_seeding(
finder_conf, filter_conf, spacepoints, tree, seeds_device.get(),
seed_count_device.get());

CUDA_ERROR_CHECK(cudaGetLastError());
CUDA_ERROR_CHECK(cudaDeviceSynchronize());
TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());
TRACCC_CUDA_ERROR_CHECK(cudaDeviceSynchronize());

/*
* Transfer the seed count back to the host and then hand it to the user.
*/
uint32_t seed_count_host;
CUDA_ERROR_CHECK(cudaMemcpy(&seed_count_host, seed_count_device.get(),
TRACCC_CUDA_ERROR_CHECK(cudaMemcpy(&seed_count_host, seed_count_device.get(),
sizeof(uint32_t), cudaMemcpyDeviceToHost));

return {std::move(seeds_device), seed_count_host};
Expand Down
Loading

0 comments on commit c79b097

Please sign in to comment.