Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipDeegan committed Nov 12, 2024
1 parent 47e58b6 commit d88605d
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 121 deletions.
20 changes: 9 additions & 11 deletions inc/mkn/gpu/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,9 @@ struct dim3 {
std::size_t x = 1, y = 1, z = 1;
};

void setLimitMallocHeapSize(std::size_t const& /*bytes*/) {} /*noop*/
void inline setLimitMallocHeapSize(std::size_t const& /*bytes*/) {} /*noop*/

void setDevice(std::size_t const& /*dev*/) {} /*noop*/

auto supportsCooperativeLaunch(int const /*dev*/ = 0) {
int supportsCoopLaunch = 0;
return supportsCoopLaunch;
}
void inline setDevice(std::size_t const& /*dev*/) {} /*noop*/

struct Stream {
Stream() {}
Expand Down Expand Up @@ -217,7 +212,7 @@ namespace detail {
static thread_local std::size_t idx = 0;
}

template <bool _sync = true, bool _coop = false, typename F, typename... Args>
template <bool _sync = true, typename F, typename... Args>
void launch(F f, dim3 g, dim3 b, std::size_t /*ds*/, std::size_t /*stream*/, Args&&... args) {
std::size_t N = (g.x * g.y * g.z) * (b.x * b.y * b.z);
KLOG(TRC) << N;
Expand Down Expand Up @@ -261,7 +256,7 @@ struct GLauncher : public Launcher {
std::size_t count;
};

void prinfo(std::size_t /*dev*/ = 0) { KOUT(NON) << "Psuedo GPU in use"; }
void inline prinfo(std::size_t /*dev*/ = 0) { KOUT(NON) << "Psuedo GPU in use"; }

} // namespace MKN_GPU_NS

Expand All @@ -281,9 +276,12 @@ static void global_gd_kernel(F& f, std::size_t s, Args... args) {
if (auto i = mkn::gpu::cpu::idx(); i < s) f(args...);
}

#include "launchers.hpp"
template <typename F, typename... Args>
static void global_d_kernel(F& f, std::size_t s, Args... args) {
f(args...);
}

void grid_sync() {}
#include "launchers.hpp"

} /* namespace MKN_GPU_NS */

Expand Down
37 changes: 12 additions & 25 deletions inc/mkn/gpu/cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "mkn/kul/tuple.hpp"
#include "mkn/kul/assert.hpp"

#include "mkn/gpu/def.hpp"
#include "mkn/gpu/cli.hpp"

#include <cuda_runtime.h>
#include <cooperative_groups.h>
#include "mkn/gpu/def.hpp"

//

Expand Down Expand Up @@ -86,17 +86,11 @@ __device__ SIZE block_idx_x() {

namespace MKN_GPU_NS {

void setLimitMallocHeapSize(std::size_t const& bytes) {
void inline setLimitMallocHeapSize(std::size_t const& bytes) {
MKN_GPU_ASSERT(cudaDeviceSetLimit(cudaLimitMallocHeapSize, bytes));
}

void setDevice(std::size_t const& dev) { MKN_GPU_ASSERT(cudaSetDevice(dev)); }

auto supportsCooperativeLaunch(int const dev = 0) {
int supportsCoopLaunch = 0;
MKN_GPU_ASSERT(cudaDeviceGetAttribute(&supportsCoopLaunch, cudaDevAttrCooperativeLaunch, dev));
return supportsCoopLaunch;
}
void inline setDevice(std::size_t const& dev) { MKN_GPU_ASSERT(cudaSetDevice(dev)); }

struct Stream {
Stream() { MKN_GPU_ASSERT(result = cudaStreamCreate(&stream)); }
Expand Down Expand Up @@ -249,19 +243,13 @@ void inline sync(cudaStream_t stream) { MKN_GPU_ASSERT(cudaStreamSynchronize(str
#include "mkn/gpu/alloc.hpp"
#include "mkn/gpu/device.hpp"

template <bool _sync = true, bool _coop = false, typename F, typename... Args>
template <bool _sync = true, typename F, typename... Args>
void launch(F&& f, dim3 g, dim3 b, std::size_t ds, cudaStream_t& s, Args&&... args) {
std::size_t N = (g.x * g.y * g.z) * (b.x * b.y * b.z);
KLOG(TRC) << N;
std::apply(
[&](auto&&... params) {
if constexpr (_coop) {
auto address_of = [](auto& a) { return (void*)&a; };
void* kernelArgs[] = {(address_of(params), ...)};
cudaLaunchCooperativeKernel((void*)f, g, b, kernelArgs, ds);
} else {
f<<<g, b, ds, s>>>(params...);
}
f<<<g, b, ds, s>>>(params...);
MKN_GPU_ASSERT(cudaGetLastError());
},
devmem_replace(std::forward_as_tuple(args...), std::make_index_sequence<sizeof...(Args)>()));
Expand Down Expand Up @@ -316,6 +304,11 @@ __global__ static void global_gd_kernel(F f, std::size_t s, Args... args) {
if (auto i = mkn::gpu::cuda::idx(); i < s) f(args...);
}

template <typename F, typename... Args>
__global__ static void global_d_kernel(F f, std::size_t s, Args... args) {
f(args...);
}

#include "launchers.hpp"

template <typename T, typename V>
Expand Down Expand Up @@ -358,14 +351,8 @@ void print_gpu_mem_used() {
total_t, total_m, used_m);
}

__device__ void grid_sync() {
namespace cg = cooperative_groups;
cg::grid_group grid = cg::this_grid();
assert(grid.is_valid());
grid.sync();
}

} // namespace MKN_GPU_NS

#undef MKN_GPU_ASSERT

#endif /* _MKN_GPU_CUDA_HPP_ */
17 changes: 11 additions & 6 deletions inc/mkn/gpu/defines.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
#ifndef _MKN_GPU_DEFINES_HPP_
#define _MKN_GPU_DEFINES_HPP_

#if !defined(MKN_GPU_FN_PER_NS)
#define MKN_GPU_FN_PER_NS 0
#endif

#if !defined(MKN_GPU_ROCM) and __has_include("hip/hip_runtime.h")
#define MKN_GPU_ROCM 1
#endif
Expand All @@ -19,18 +15,27 @@
#define MKN_GPU_CUDA 0
#endif

#if MKN_GPU_CUDA == 1 && MKN_GPU_ROCM == 1 && MKN_GPU_FN_PER_NS == 0
#if MKN_GPU_CUDA == 1 && MKN_GPU_ROCM == 1 && !defined(MKN_GPU_FN_PER_NS)
#define MKN_GPU_FN_PER_NS 1
#endif

#if !defined(MKN_GPU_FN_PER_NS)
#define MKN_GPU_FN_PER_NS 0
#endif

#if MKN_GPU_ROCM == 1
#include "mkn/gpu/rocm.hpp"
#endif

#if MKN_GPU_CUDA
#if MKN_GPU_CUDA == 1
#include "mkn/gpu/cuda.hpp"
#endif

#if MKN_GPU_CUDA == 0 && MKN_GPU_ROCM == 0 && !defined(MKN_GPU_CPU)
#pragma message("mkn.gpu error: No accelerator found, defaulting to CPU IMP")
#define MKN_GPU_CPU 1
#endif

#if MKN_GPU_FN_PER_NS == 1 || MKN_GPU_CPU == 1
#include "mkn/gpu/cpu.hpp"
#endif
Expand Down
18 changes: 9 additions & 9 deletions inc/mkn/gpu/launchers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef _MKN_GPU_LAUNCHERS_HPP_
#define _MKN_GPU_LAUNCHERS_HPP_

template <bool _sync = true, bool _coop = false>
template <bool _sync = true>
struct GDLauncher : public GLauncher {
GDLauncher(std::size_t s, size_t dev = 0) : GLauncher{s, dev} {}

Expand All @@ -58,27 +58,26 @@ struct GDLauncher : public GLauncher {

template <typename S, typename F, typename... PArgs, typename... Args>
void _launch(S& _s, F& f, std::tuple<PArgs&...>*, Args&&... args) {
MKN_GPU_NS::launch<_sync, _coop>(&global_gd_kernel<F, PArgs...>, g, b, ds, _s, f, args...);
MKN_GPU_NS::launch<_sync>(&global_gd_kernel<F, PArgs...>, g, b, ds, _s, f, args...);
}
};


template <bool _sync = false, bool _coop = false>
template <bool _sync = false>
struct DLauncher : public Launcher {
DLauncher(size_t dev = 0) : Launcher{{}, {}} {}
DLauncher(size_t /*dev*/ = 0) : Launcher{{}, {}} {}

template <typename F, typename... Args>
auto operator()(F&& f, Args&&... args) {
_launch(s, f,
as_values(std::forward_as_tuple(args...), std::make_index_sequence<sizeof...(Args)>()),
args...);
args...);
}

template <typename F, typename... Args>
auto stream(Stream& s, F&& f, Args&&... args) {
_launch(s.stream, f,
as_values(std::forward_as_tuple(args...), std::make_index_sequence<sizeof...(Args)>()),
args...);
args...);
}

protected:
Expand All @@ -90,9 +89,10 @@ struct DLauncher : public Launcher {

template <typename S, typename F, typename... PArgs, typename... Args>
void _launch(S& _s, F& f, std::tuple<PArgs&...>*, Args&&... args) {
MKN_GPU_NS::launch<_sync, _coop>(&global_d_kernel<F, PArgs...>, g, b, ds, _s, f, args...);
MKN_GPU_NS::launch<_sync>(&global_d_kernel<F, PArgs...>, g, b, ds, _s, f, args...);
}
};

//
};

#endif /* _MKN_GPU_LAUNCHERS_HPP_ */
36 changes: 7 additions & 29 deletions inc/mkn/gpu/rocm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "mkn/kul/tuple.hpp"
#include "mkn/kul/assert.hpp"

#include "mkn/gpu/cli.hpp"
#include "hip/hip_runtime.h"
#include "mkn/gpu/def.hpp"
#include "mkn/gpu/cli.hpp"

#include "hip/hip_cooperative_groups.h"

// #define MKN_GPU_ASSERT(x) (KASSERT((x) == hipSuccess))
#include "hip/hip_runtime.h"

#define MKN_GPU_ASSERT(ans) \
{ \
Expand Down Expand Up @@ -81,18 +78,11 @@ __device__ SIZE idx() {

namespace MKN_GPU_NS {

void setLimitMallocHeapSize(std::size_t const& bytes) {
void inline setLimitMallocHeapSize(std::size_t const& bytes) {
MKN_GPU_ASSERT(hipDeviceSetLimit(hipLimitMallocHeapSize, bytes));
}

void setDevice(std::size_t const& dev) { MKN_GPU_ASSERT(hipSetDevice(dev)); }

auto supportsCooperativeLaunch(int const dev = 0) {
int supportsCoopLaunch = 0;
MKN_GPU_ASSERT(
hipDeviceGetAttribute(&supportsCoopLaunch, hipDeviceAttributeCooperativeLaunch, dev));
return supportsCoopLaunch;
}
void inline setDevice(std::size_t const& dev) { MKN_GPU_ASSERT(hipSetDevice(dev)); }

struct Stream {
Stream() { MKN_GPU_ASSERT(result = hipStreamCreate(&stream)); }
Expand Down Expand Up @@ -259,19 +249,13 @@ void inline sync(hipStream_t stream) { MKN_GPU_ASSERT(hipStreamSynchronize(strea
#include "mkn/gpu/alloc.hpp"
#include "mkn/gpu/device.hpp"

template <bool _sync = true, bool _coop = false, typename F, typename... Args>
template <bool _sync = true, typename F, typename... Args>
void launch(F&& f, dim3 g, dim3 b, std::size_t ds, hipStream_t& s, Args&&... args) {
std::size_t N = (g.x * g.y * g.z) * (b.x * b.y * b.z);
KLOG(TRC) << N;
std::apply(
[&](auto&&... params) {
if constexpr (_coop) {
auto address_of = [](auto& a) { return (void*)&a; };
void* kernelArgs[] = {(address_of(params), ...)};
MKN_GPU_ASSERT(hipLaunchCooperativeKernel<F>(f, g, b, kernelArgs, ds, s));
} else {
hipLaunchKernelGGL(f, g, b, ds, s, params...);
}
hipLaunchKernelGGL(f, g, b, ds, s, params...);
MKN_GPU_ASSERT(hipGetLastError());
},
devmem_replace(std::forward_as_tuple(args...), std::make_index_sequence<sizeof...(Args)>()));
Expand Down Expand Up @@ -367,14 +351,8 @@ void print_gpu_mem_used() {
total_t, total_m, used_m);
}

__device__ void grid_sync() {
namespace cg = cooperative_groups;
cg::grid_group grid = cg::this_grid();
assert(grid.is_valid());
grid.sync();
}

} // namespace MKN_GPU_NS

#undef MKN_GPU_ASSERT

#endif /* _MKN_GPU_ROCM_HPP_ */
41 changes: 0 additions & 41 deletions test/any/coop.cpp

This file was deleted.

0 comments on commit d88605d

Please sign in to comment.