Skip to content

Commit

Permalink
host index function
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipDeegan committed Oct 20, 2024
1 parent cd06ef9 commit cb24f6d
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 6 deletions.
30 changes: 29 additions & 1 deletion inc/mkn/gpu/multi_launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ struct StreamGroupBarrierFunction : StreamGroupFunction<Strat> {
v.reserve(groups);
for (std::size_t i = 0; i < groups; ++i)
v.emplace_back(std::make_unique<GroupBarrier>(self, i));
return std::move(v);
return v;
}

StreamGroupBarrierFunction(std::size_t const& gs, Strat& strat)
Expand Down Expand Up @@ -298,6 +298,27 @@ struct StreamHostGroupMutexFunction : StreamGroupFunction<Strat> {
std::vector<std::mutex> mutices;
};

template <typename Strat, typename Fn>
struct StreamHostGroupIndexFunction : StreamGroupFunction<Strat> {
using Super = StreamGroupFunction<Strat>;
using Super::strat;

std::string_view constexpr static MOD_GROUP_ERROR =
"mkn.gpu error: StreamHostGroupIndexFunction Group size must be a divisor of datas";

StreamHostGroupIndexFunction(std::size_t const& gs, std::size_t const& gid_, Strat& strat,
Fn&& fn_)
: Super{gs, strat, StreamFunctionMode::HOST_WAIT}, fn{fn_}, gid{gid_} {}

void run(std::uint32_t const i) override {
if (i % Super::group_size == gid) fn(i);
strat.status[i] = SFS::WAIT; // done
}

Fn fn;
std::size_t const gid;
};

template <typename Datas>
struct ThreadedStreamLauncher : public StreamLauncher<Datas, ThreadedStreamLauncher<Datas>> {
using This = ThreadedStreamLauncher<Datas>;
Expand Down Expand Up @@ -343,6 +364,13 @@ struct ThreadedStreamLauncher : public StreamLauncher<Datas, ThreadedStreamLaunc
return *this;
}

template <typename Fn>
This& host_group_idx(std::size_t const& group_size, std::size_t const& group_idx, Fn&& fn) {
fns.emplace_back(std::make_shared<StreamHostGroupIndexFunction<This, Fn>>(
group_size, group_idx, *this, std::forward<Fn>(fn)));
return *this;
}

void operator()() { join(); }
Super& super() { return *this; }
void super(std::size_t const& idx) { return super()(idx); }
Expand Down
9 changes: 6 additions & 3 deletions inc/mkn/gpu/rocm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// #define MKN_GPU_ASSERT(x) (KASSERT((x) == hipSuccess))

#define MKN_GPU_ASSERT(ans) \
{ gpuAssert((ans), __FILE__, __LINE__); }
#define MKN_GPU_ASSERT(ans) \
{ \
gpuAssert((ans), __FILE__, __LINE__); \
}
inline void gpuAssert(hipError_t code, const char* file, int line, bool abort = true) {
if (code != hipSuccess) {
fprintf(stderr, "GPUassert: %s %s %d\n", hipGetErrorString(code), file, line);
Expand Down Expand Up @@ -203,8 +205,9 @@ void alloc_host(T*& p, Size size) {

template <typename T, typename Size>
void alloc_managed(T*& p, Size size) {
auto const bytes = size * sizeof(T);
KLOG(TRC) << "GPU alloced: " << size * sizeof(T);
MKN_GPU_ASSERT(hipMallocManaged((void**)&p, size * sizeof(T)));
MKN_GPU_ASSERT(hipMallocManaged((void**)&p, bytes));
}

void inline destroy(void* p) {
Expand Down
48 changes: 46 additions & 2 deletions test/any/async_streaming.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,52 @@ std::uint32_t test_threaded_host_group_mutex(std::size_t const& nthreads = 2) {
return 0;
}

std::uint32_t test_threaded_host_group_idx(std::size_t const& nthreads = 2) {
using T = double;
KUL_DBG_FUNC_ENTER;

std::size_t constexpr group_size = 3;

std::vector<ManagedVector<T>> vecs(C + 1, ManagedVector<T>(NUM, 0));
for (std::size_t i = 0; i < vecs.size(); ++i) std::fill_n(vecs[i].data(), NUM, i);

ManagedVector<T*> datas(C + 1);
for (std::size_t i = 0; i < vecs.size(); ++i) datas[i] = vecs[i].data();
auto views = datas.data();

ThreadedStreamLauncher{vecs, nthreads}
.dev([=] __device__(auto const& i) { views[i][mkn::gpu::idx()] += 1; })
.host([&](auto i) mutable {
std::this_thread::sleep_for(200ms);
for (auto& e : vecs[i]) e += 1;
})
.host_group_idx(group_size, 0,
[&](auto const i) {
for (auto& e : vecs[i]) e += 1;
})
.dev([=] __device__(auto const& i) { views[i][mkn::gpu::idx()] += 3; })();

std::size_t val = 5;
for (std::size_t i = 0; i < vecs.size(); i++) {
if (i % group_size == 0) {
for (auto const& e : vecs[i])
if (e != val + 1) return 1;
} else {
for (auto const& e : vecs[i])
if (e != val) return 1;
}
++val;
};

return 0;
}

int main() {
KOUT(NON) << __FILE__;
return test() + test_threaded() + test_threaded(6) + test_threaded_group_barrier() +
test_threaded_host_group_mutex();
return test() //
+ test_threaded() //
+ test_threaded(6) //
+ test_threaded_group_barrier() //
+ test_threaded_host_group_mutex() //
+ test_threaded_host_group_idx();
}

0 comments on commit cb24f6d

Please sign in to comment.