Skip to content

Commit

Permalink
updates (#17)
Browse files Browse the repository at this point in the history
* updates
  • Loading branch information
PhilipDeegan authored Nov 12, 2023
1 parent eb6b1a2 commit 8b44c8c
Show file tree
Hide file tree
Showing 13 changed files with 274 additions and 86 deletions.
16 changes: 16 additions & 0 deletions inc/mkn/gpu/alloc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,20 @@ class ManagedAllocator {
}
};

template <typename T, typename Size>
void copy(T* const dst, T const* const src, Size size) {
auto dst_p = Pointer{dst};
auto src_p = Pointer{src};

bool to_send = dst_p.is_device_ptr() && src_p.is_host_ptr();
bool to_take = dst_p.is_host_ptr() && src_p.is_device_ptr();

if (to_send)
send(dst, src, size);
else if (to_take)
take(dst, src, size);
else
throw std::runtime_error("Unsupported operation (PR welcome)");
}

#endif /* _MKN_GPU_ALLOC_HPP_ */
10 changes: 5 additions & 5 deletions inc/mkn/gpu/asio.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ struct BatchMaker {
};

template <typename F, typename... Args>
__global__ static void global_kernel(F&& f, std::uint32_t max, std::uint32_t batch_index,
__global__ static void global_kernel(F f, std::uint32_t max, std::uint32_t batch_index,
Args... args) {
if (auto bi = mkn::gpu::idx(); bi < max) f(bi + (max * batch_index), args...);
}
Expand All @@ -146,7 +146,7 @@ class Launcher {
Launcher(std::size_t tpx, std::size_t n_batches_ = 1) : Launcher{dim3(), dim3(tpx), n_batches_} {}

template <typename F, typename Batch_t>
void launch(F&& f, Batch_t& batch) {
void launch(F& f, Batch_t& batch) {
auto const& streamSize = batch.streamSize;

g.x = streamSize / b.x;
Expand Down Expand Up @@ -185,13 +185,13 @@ class Launcher {
}

template <typename F, typename... PArgs, typename... Args>
void _launch(F&& f, std::tuple<PArgs&...>*, Stream& stream, std::uint32_t max,
void _launch(F& f, std::tuple<PArgs&...>*, Stream& stream, std::uint32_t max,
std::uint32_t offset, Args&&... args) {
MKN_GPU_NS::launch(global_kernel<F&&, PArgs...>, g, b, ds, stream(), f, max, offset, args...);
MKN_GPU_NS::launch(global_kernel<F, PArgs...>, g, b, ds, stream(), f, max, offset, args...);
}

template <typename F, typename... Args>
void launch(Stream& stream, F&& f, std::uint32_t max, std::uint32_t offset, Args&&... args) {
void launch(Stream& stream, F& f, std::uint32_t max, std::uint32_t offset, Args&&... args) {
_launch(f,
as_values(std::forward_as_tuple(args...), std::make_index_sequence<sizeof...(Args)>()),
stream, max, offset, args...);
Expand Down
36 changes: 25 additions & 11 deletions inc/mkn/gpu/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "mkn/gpu/def.hpp"

#include <cassert>
#include <cstring>

#define MKN_GPU_ASSERT(x) (KASSERT((x)))
Expand All @@ -64,14 +65,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define __host__
#define __global__

namespace mkn::gpu {
#if defined(MKN_GPU_FN_PER_NS) && MKN_GPU_FN_PER_NS
namespace cpu {
#define MKN_GPU_NS mkn::gpu::cpu
#else
#define MKN_GPU_NS mkn::gpu
#endif // MKN_GPU_FN_PER_NS

namespace MKN_GPU_NS {

struct dim3 {
dim3() {}
dim3(std::size_t x) : x{x} {}
Expand Down Expand Up @@ -186,12 +187,15 @@ static thread_local std::size_t idx = 0;
}

template <typename F, typename... Args>
void launch(F&& f, dim3 g, dim3 b, std::size_t /*ds*/, std::size_t /*stream*/, Args&&... args) {
void launch(F f, dim3 g, dim3 b, std::size_t /*ds*/, std::size_t /*stream*/, Args&&... args) {
std::size_t N = (g.x * g.y * g.z) * (b.x * b.y * b.z);
KLOG(TRC) << N;
std::apply(
[&](auto&&... params) {
for (std::size_t i = 0; i < N; ++i) f(params...);
for (std::size_t i = 0; i < N; ++i) {
f(params...);
detail::idx++;
}
},
devmem_replace(std::forward_as_tuple(args...), std::make_index_sequence<sizeof...(Args)>()));

Expand All @@ -217,28 +221,38 @@ struct Launcher {
};

struct GLauncher : public Launcher {
GLauncher(std::size_t s, [[maybe_unused]] size_t dev = 0) : Launcher{dim3{}, dim3{}} {
GLauncher(std::size_t s, [[maybe_unused]] size_t dev = 0) : Launcher{dim3{}, dim3{}}, count{s} {
b.x = 1024;
g.x = s / b.x;
if ((s % b.x) > 0) ++g.x;
}

std::size_t count;
};

void prinfo([[maybe_unused]] std::size_t dev = 0) { KOUT(NON) << "Psuedo GPU in use"; }
void prinfo(std::size_t /*dev*/ = 0) { KOUT(NON) << "Psuedo GPU in use"; }

#if defined(MKN_GPU_FN_PER_NS) && MKN_GPU_FN_PER_NS
} /* namespace cuda */
#endif // MKN_GPU_FN_PER_NS
} /* namespace mkn::gpu */
} // namespace MKN_GPU_NS

namespace mkn::gpu::cpu {

template <typename SIZE = std::uint32_t /*max 4294967296*/>
SIZE idx() {
return MKN_GPU_NS::detail::idx++;
return MKN_GPU_NS::detail::idx;
}

} // namespace mkn::gpu::cpu

namespace MKN_GPU_NS {

template <typename F, typename... Args>
static void global_gd_kernel(F& f, std::size_t s, Args... args) {
if (auto i = mkn::gpu::cpu::idx(); i < s) f(args...);
}

#include "launchers.hpp"

} /* namespace MKN_GPU_NS */

#undef MKN_GPU_ASSERT
#endif /* _MKN_PSUEDO_GPU_HPP_ */
21 changes: 13 additions & 8 deletions inc/mkn/gpu/cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort =
}
}

namespace mkn::gpu {
#if defined(MKN_GPU_FN_PER_NS) && MKN_GPU_FN_PER_NS
namespace cuda {
#define MKN_GPU_NS mkn::gpu::cuda
#else
#define MKN_GPU_NS mkn::gpu
#endif // MKN_GPU_FN_PER_NS

namespace MKN_GPU_NS {

struct Stream {
Stream() { MKN_GPU_ASSERT(result = cudaStreamCreate(&stream)); }
~Stream() { MKN_GPU_ASSERT(result = cudaStreamDestroy(stream)); }
Expand Down Expand Up @@ -210,7 +210,7 @@ struct Launcher {

template <typename F, typename... Args>
void operator()(F&& f, Args&&... args) {
launch(f, g, b, ds, s, args...);
launch(std::forward<F>(f), g, b, ds, s, args...);
}

size_t ds = 0 /*dynamicShared*/;
Expand All @@ -219,17 +219,25 @@ struct Launcher {
};

struct GLauncher : public Launcher {
GLauncher(std::size_t s, size_t dev = 0) : Launcher{dim3{}, dim3{}} {
GLauncher(std::size_t s, size_t dev = 0) : Launcher{dim3{}, dim3{}}, count{s} {
[[maybe_unused]] auto ret = cudaGetDeviceProperties(&devProp, dev);

b.x = devProp.maxThreadsPerBlock;
g.x = s / b.x;
if ((s % b.x) > 0) ++g.x;
}

std::size_t count = 0;
cudaDeviceProp devProp;
};

template <typename F, typename... Args>
__global__ static void global_gd_kernel(F f, std::size_t s, Args... args) {
if (auto i = mkn::gpu::cuda::idx(); i < s) f(args...);
}

#include "launchers.hpp"

template <typename T, typename V>
__global__ void _vector_fill(T* a, V t, std::size_t s) {
if (auto i = mkn::gpu::cuda::idx(); i < s) a[i] = t;
Expand Down Expand Up @@ -259,10 +267,7 @@ void prinfo(size_t dev = 0) {
KOUT(NON) << " threadsPBlock " << devProp.maxThreadsPerBlock;
}

#if defined(MKN_GPU_FN_PER_NS) && MKN_GPU_FN_PER_NS
} /* namespace cuda */
#endif // MKN_GPU_FN_PER_NS
} /* namespace mkn::gpu */
} // namespace MKN_GPU_NS

#undef MKN_GPU_ASSERT
#endif /* _MKN_GPU_CUDA_HPP_ */
57 changes: 57 additions & 0 deletions inc/mkn/gpu/launchers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/**
Copyright (c) 2020, Philip Deegan.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Philip Deegan nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _MKN_GPU_LAUNCHERS_HPP_
#define _MKN_GPU_LAUNCHERS_HPP_

struct GDLauncher : public GLauncher {
GDLauncher(std::size_t s, size_t dev = 0) : GLauncher{s, dev} {}

template <typename F, typename... Args>
auto operator()(F&& f, Args&&... args) {
_launch(std::forward<F>(f),
as_values(std::forward_as_tuple(args...), std::make_index_sequence<sizeof...(Args)>()),
count, args...);
}

protected:
template <std::size_t... I, typename... Args>
auto as_values(std::tuple<Args&...>&& tup, std::index_sequence<I...>) {
using T = std::tuple<decltype(MKN_GPU_NS::replace(std::get<I>(tup)))&...>*;
return T{nullptr};
}

template <typename F, typename... PArgs, typename... Args>
void _launch(F&& f, std::tuple<PArgs&...>*, Args&&... args) {
MKN_GPU_NS::launch(&global_gd_kernel<F, PArgs...>, g, b, ds, s, f, args...);
}
};

#endif /* _MKN_GPU_LAUNCHERS_HPP_ */
50 changes: 34 additions & 16 deletions inc/mkn/gpu/rocm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ inline void gpuAssert(hipError_t code, const char* file, int line, bool abort =
}
}

namespace mkn::gpu {
#if defined(MKN_GPU_FN_PER_NS) && MKN_GPU_FN_PER_NS
namespace hip {
#define MKN_GPU_NS mkn::gpu::hip
#else
#define MKN_GPU_NS mkn::gpu
#endif // MKN_GPU_FN_PER_NS

namespace MKN_GPU_NS {

struct Stream {
Stream() { MKN_GPU_ASSERT(result = hipStreamCreate(&stream)); }
~Stream() { MKN_GPU_ASSERT(result = hipStreamDestroy(stream)); }
Expand All @@ -76,12 +76,10 @@ template <typename T>
struct Pointer {
Pointer(T* _t) : t{_t} { MKN_GPU_ASSERT(hipPointerGetAttributes(&attributes, t)); }

bool is_unregistered_ptr() const { return attributes.type == 0; }
bool is_host_ptr() const {
return attributes.type == 1 || (is_unregistered_ptr() && t != nullptr);
}
bool is_device_ptr() const { return is_managed_ptr() || attributes.type == 2; }
bool is_managed_ptr() const { return attributes.type == 3; }
// bool is_unregistered_ptr() const { return attributes.type == 0; }
bool is_host_ptr() const { return attributes.hostPointer != nullptr; }
bool is_device_ptr() const { return attributes.devicePointer != nullptr; }
bool is_managed_ptr() const { return attributes.isManaged; }

T* t;
hipPointerAttribute_t attributes;
Expand Down Expand Up @@ -173,7 +171,7 @@ void sync() { MKN_GPU_ASSERT(hipDeviceSynchronize()); }
#include "mkn/gpu/device.hpp"

template <typename F, typename... Args>
void launch(F f, dim3 g, dim3 b, std::size_t ds, hipStream_t& s, Args&&... args) {
void launch(F&& f, dim3 g, dim3 b, std::size_t ds, hipStream_t& s, Args&&... args) {
std::size_t N = (g.x * g.y * g.z) * (b.x * b.y * b.z);
KLOG(TRC) << N;
std::apply(
Expand All @@ -191,8 +189,8 @@ struct Launcher {
: Launcher{dim3(x / tpx, y / tpy, z / tpz), dim3(tpx, tpy, tpz)} {}

template <typename F, typename... Args>
void operator()(F f, Args&&... args) {
launch(f, g, b, ds, s, args...);
void operator()(F&& f, Args&&... args) {
launch(std::forward<F>(f), g, b, ds, s, args...);
}

size_t ds = 0 /*dynamicShared*/;
Expand All @@ -201,17 +199,40 @@ struct Launcher {
};

struct GLauncher : public Launcher {
GLauncher(std::size_t s, size_t dev = 0) : Launcher{dim3{}, dim3{}} {
GLauncher(std::size_t s, size_t dev = 0) : Launcher{dim3{}, dim3{}}, count{s} {
[[maybe_unused]] auto ret = hipGetDeviceProperties(&devProp, dev);

b.x = devProp.maxThreadsPerBlock;
g.x = s / b.x;
if ((s % b.x) > 0) ++g.x;
}

std::size_t count = 0;
hipDeviceProp_t devProp;
};

template <typename F, typename... Args>
__global__ static void global_gd_kernel(F f, std::size_t s, Args... args) {
if (auto i = mkn::gpu::hip::idx(); i < s) f(args...);
}

#include "launchers.hpp"

template <typename T, typename V>
__global__ void _vector_fill(T* a, V t, std::size_t s) {
if (auto i = mkn::gpu::hip::idx(); i < s) a[i] = t;
}

template <typename Container, typename T>
void fill(Container& c, size_t size, T val) {
GLauncher{c.size()}(_vector_fill<typename Container::value_type, T>, c.data(), val, size);
}

template <typename Container, typename T>
void fill(Container& c, T val) {
GLauncher{c.size()}(_vector_fill<typename Container::value_type, T>, c.data(), val, c.size());
}

// https://rocm-developer-tools.github.io/HIP/group__Device.html
void prinfo(size_t dev = 0) {
hipDeviceProp_t devProp;
Expand All @@ -226,10 +247,7 @@ void prinfo(size_t dev = 0) {
KOUT(NON) << " threadsPBlock " << devProp.maxThreadsPerBlock;
}

#if defined(MKN_GPU_FN_PER_NS) && MKN_GPU_FN_PER_NS
} /* namespace hip */
#endif // MKN_GPU_FN_PER_NS
} /* namespace mkn::gpu */
} // namespace MKN_GPU_NS

#undef MKN_GPU_ASSERT
#endif /* _MKN_GPU_ROCM_HPP_ */
1 change: 1 addition & 0 deletions mkn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ profile:
parent: headers
arg: -DMKN_GPU_CPU
test: test/any/(\w).cpp
test/cpu/(\w).cpp

- name: format
mod: |
Expand Down
Loading

0 comments on commit 8b44c8c

Please sign in to comment.