Skip to content

Commit

Permalink
[ GPU/OpenCL ] Split register kernel from forwarding function
Browse files Browse the repository at this point in the history
- This commit is draft
- This commit splits kernel registeration from forwarding function.
- This is WIP. This commit contains example update for concat_cl and
fc_layer_cl.

Self evaluation:

Build test: [X]Passed [ ]Failed [ ]Skipped
Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Eunju Yang <[email protected]>
  • Loading branch information
EunjuYang committed Nov 5, 2024
1 parent b1a3c75 commit 0935dbc
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 63 deletions.
12 changes: 8 additions & 4 deletions nntrainer/cl_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ std::once_flag global_cl_context_init_flag;

static void add_default_object(ClContext &cc) {

FullyConnectedLayerCl::registerClKernels();
cc.registerFactory(nntrainer::createLayer<FullyConnectedLayerCl>,
FullyConnectedLayerCl::type,
ml::train::LayerType::LAYER_FC);
Expand All @@ -40,15 +41,18 @@ static void add_default_object(ClContext &cc) {
// AdditionLayerCL::type,
// ml::train::LayerType::LAYER_ADDITION);

cc.registerFactory(nntrainer::createLayer<SwiGLULayerCl>, SwiGLULayerCl::type,
ml::train::LayerType::LAYER_SWIGLU);
// cc.registerFactory(nntrainer::createLayer<SwiGLULayerCl>,
// SwiGLULayerCl::type,
// ml::train::LayerType::LAYER_SWIGLU);

ReshapeLayerCl::registerClKernels();
cc.registerFactory(nntrainer::createLayer<ReshapeLayerCl>,
ReshapeLayerCl::type, ml::train::LayerType::LAYER_RESHAPE);

cc.registerFactory(nntrainer::createLayer<RMSNormLayerCl>,
RMSNormLayerCl::type, ml::train::LayerType::LAYER_RMSNORM);
// cc.registerFactory(nntrainer::createLayer<RMSNormLayerCl>,
// RMSNormLayerCl::type, ml::train::LayerType::LAYER_RMSNORM);

ConcatLayerCl::registerClKernels();
cc.registerFactory(nntrainer::createLayer<ConcatLayerCl>, ConcatLayerCl::type,
ml::train::LayerType::LAYER_CONCAT);
}
Expand Down
100 changes: 56 additions & 44 deletions nntrainer/layers/cl_layers/concat_cl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,12 +233,54 @@ std::string concat_cl_axis1_kernel_ =
})";

namespace nntrainer {
ConcatLayerCl::ConcatLayerCl() : Layer() {}
ConcatLayerCl::ConcatLayerCl() : LayerImplCl() {}

static constexpr size_t SINGLE_INOUT_IDX = 0;
static constexpr size_t INPUT_IDX_1 = 0;
static constexpr size_t INPUT_IDX_2 = 1;

bool ConcatLayerCl::registerClKernels() {
ClContext::SharedPtrClKernel kernel_concat_ptr = nullptr;

kernel_concat_ptr =
cl_context_ref.registerClKernel(concat_cl_axis1_kernel_, "concat_cl_axis1");
NNTR_THROW_IF(!kernel_concat_ptr, std::runtime_error)
<< "OpenCL Error: Fail to register concat_cl_axis1 kernel";
layer_kernel_ptrs.emplace_back(kernel_concat_ptr);

kernel_concat_ptr =
cl_context_ref.registerClKernel(concat_cl_axis2_kernel_, "concat_cl_axis2");
NNTR_THROW_IF(!kernel_concat_ptr, std::runtime_error)
<< "OpenCL Error: Fail to register concat_cl_axis2 kernel";
layer_kernel_ptrs.emplace_back(kernel_concat_ptr);

kernel_concat_ptr =
cl_context_ref.registerClKernel(concat_cl_axis3_kernel_, "concat_cl_axis3");
NNTR_THROW_IF(!kernel_concat_ptr, std::runtime_error)
<< "OpenCL Error: Fail to register concat_cl_axis3 kernel";
layer_kernel_ptrs.emplace_back(kernel_concat_ptr);

kernel_concat_ptr = cl_context_ref.registerClKernel(
concat_cl_axis1_kernel_fp16_, "concat_cl_axis1_fp16");
NNTR_THROW_IF(!kernel_concat_ptr, std::runtime_error)
<< "OpenCL Error: Fail to register concat_cl_axis1_fp16 kernel";
layer_kernel_ptrs.emplace_back(kernel_concat_ptr);

kernel_concat_ptr = cl_context_ref.registerClKernel(
concat_cl_axis2_kernel_fp16_, "concat_cl_axis2_fp16");
NNTR_THROW_IF(!kernel_concat_ptr, std::runtime_error)
<< "OpenCL Error: Fail to register concat_cl_axis2_fp16 kernel";
layer_kernel_ptrs.emplace_back(kernel_concat_ptr);

kernel_concat_ptr = cl_context_ref.registerClKernel(
concat_cl_axis3_kernel_fp16_, "concat_cl_axis3_fp16");
NNTR_THROW_IF(!kernel_concat_ptr, std::runtime_error)
<< "OpenCL Error: Fail to register concat_cl_axis3_fp16 kernel";
layer_kernel_ptrs.emplace_back(kernel_concat_ptr);

return true;
}

void ConcatLayerCl::finalize(InitLayerContext &context) {
auto &concat_dimension_prop = std::get<props::ConcatDimension>(concat_props);
/** for backward compatibility, default concat dimension will be channel */
Expand Down Expand Up @@ -302,13 +344,6 @@ void ConcatLayerCl::incremental_forwarding(RunLayerContext &context,
ConcatProcess(in1, in2, out);
}

opencl::Kernel ConcatLayerCl::kernel_concat_axis3;
opencl::Kernel ConcatLayerCl::kernel_concat_axis3_fp16;
opencl::Kernel ConcatLayerCl::kernel_concat_axis2;
opencl::Kernel ConcatLayerCl::kernel_concat_axis2_fp16;
opencl::Kernel ConcatLayerCl::kernel_concat_axis1;
opencl::Kernel ConcatLayerCl::kernel_concat_axis1_fp16;

void ConcatLayerCl::ConcatProcess(Tensor const &in1, Tensor const &in2,
Tensor &result) {

Expand Down Expand Up @@ -375,12 +410,8 @@ void ConcatLayerCl::concat_cl_axis3(const float *matAdata,
bool result = false;

do {
ClContext::SharedPtrClKernel kernel_concat_ptr =
cl_context_ref.registerClKernel(concat_cl_axis3_kernel_,
"concat_cl_axis3");
if (!kernel_concat_ptr) {
break;
}

const auto &kernel_concat_ptr = layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS3];

int dim = int(input1_batch_size * input1_channels * input1_height *
(input1_width + input2_width));
Expand Down Expand Up @@ -486,12 +517,9 @@ void ConcatLayerCl::concat_cl_axis3_fp16(
bool result = false;

do {
ClContext::SharedPtrClKernel kernel_concat_ptr =
cl_context_ref.registerClKernel(concat_cl_axis3_kernel_fp16_,
"concat_cl_axis3_fp16");
if (!kernel_concat_ptr) {
break;
}

const auto &kernel_concat_ptr =
layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS3_FP16];

int dim = int(input1_batch_size * input1_channels * input1_height *
(input1_width + input2_width));
Expand Down Expand Up @@ -599,12 +627,8 @@ void ConcatLayerCl::concat_cl_axis2(const float *matAdata,
bool result = false;

do {
ClContext::SharedPtrClKernel kernel_concat_ptr =
cl_context_ref.registerClKernel(concat_cl_axis2_kernel_,
"concat_cl_axis2");
if (!kernel_concat_ptr) {
break;
}

const auto &kernel_concat_ptr = layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS2];

int dim = int(input1_batch_size * input1_channels * input1_width *
(input1_height + input2_height));
Expand Down Expand Up @@ -710,12 +734,8 @@ void ConcatLayerCl::concat_cl_axis2_fp16(
bool result = false;

do {
ClContext::SharedPtrClKernel kernel_concat_ptr =
cl_context_ref.registerClKernel(concat_cl_axis2_kernel_fp16_,
"concat_cl_axis2_fp16");
if (!kernel_concat_ptr) {
break;
}
const auto &kernel_concat_ptr =
layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS2_FP16];

int dim = int(input1_batch_size * input1_channels * input1_width *
(input1_height + input2_height));
Expand Down Expand Up @@ -823,12 +843,7 @@ void ConcatLayerCl::concat_cl_axis1(const float *matAdata,
bool result = false;

do {
ClContext::SharedPtrClKernel kernel_concat_ptr =
cl_context_ref.registerClKernel(concat_cl_axis1_kernel_,
"concat_cl_axis1");
if (!kernel_concat_ptr) {
break;
}
const auto &kernel_concat_ptr = layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS1];

int dim = int(input1_batch_size * input1_width * input1_height *
(input1_channels + input2_channels));
Expand Down Expand Up @@ -934,12 +949,9 @@ void ConcatLayerCl::concat_cl_axis1_fp16(
bool result = false;

do {
ClContext::SharedPtrClKernel kernel_concat_ptr =
cl_context_ref.registerClKernel(concat_cl_axis1_kernel_fp16_,
"concat_cl_axis1_fp16");
if (!kernel_concat_ptr) {
break;
}

const auto &kernel_concat_ptr =
layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS1_FP16];

int dim = int(input1_batch_size * input1_width * input1_height *
(input1_channels + input2_channels));
Expand Down
31 changes: 19 additions & 12 deletions nntrainer/layers/cl_layers/concat_cl.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <common_properties.h>
#include <layer_context.h>
#include <layer_devel.h>
#include <layer_impl.h>
#include <layer_impl_cl.h>
#include <opencl_buffer.h>
#include <opencl_kernel.h>
#include <tensor_dim.h>
Expand All @@ -31,10 +31,7 @@ namespace nntrainer {
* @class Concat Layer
* @brief Concat Layer
*/
class ConcatLayerCl : public Layer {

private:
inline static ClContext cl_context_ref;
class ConcatLayerCl : public LayerImplCl {

public:
/**
Expand Down Expand Up @@ -104,14 +101,12 @@ class ConcatLayerCl : public Layer {
*/
void setProperty(const std::vector<std::string> &values) override;

inline static const std::string type = "concat";
/**
* @brief registerClKernels
*/
static bool registerClKernels();

static opencl::Kernel kernel_concat_axis3;
static opencl::Kernel kernel_concat_axis3_fp16;
static opencl::Kernel kernel_concat_axis2;
static opencl::Kernel kernel_concat_axis2_fp16;
static opencl::Kernel kernel_concat_axis1;
static opencl::Kernel kernel_concat_axis1_fp16;
inline static const std::string type = "concat";

/**
* @brief Process data and dimensions for concat
Expand Down Expand Up @@ -233,6 +228,18 @@ class ConcatLayerCl : public Layer {
#endif
private:
std::tuple<props::ConcatDimension> concat_props;

inline static std::vector<ClContext::SharedPtrClKernel>
layer_kernel_ptrs; /** kernel list relevant with this layer */

enum Kernels {
CONCAT_CL_AXIS1,
CONCAT_CL_AXIS2,
CONCAT_CL_AXIS3,
CONCAT_CL_AXIS1_FP16,
CONCAT_CL_AXIS2_FP16,
CONCAT_CL_AXIS3_FP16,
};
};

} // namespace nntrainer
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/layers/cl_layers/fc_layer_cl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ static constexpr size_t SINGLE_INOUT_IDX = 0;
enum FCParams { weight, bias };

FullyConnectedLayerCl::FullyConnectedLayerCl() :
LayerImpl(), fc_props(props::Unit()) {
LayerImplCl(), fc_props(props::Unit()) {
weight_idx.fill(std::numeric_limits<unsigned>::max());
}

Expand Down
11 changes: 9 additions & 2 deletions nntrainer/layers/cl_layers/fc_layer_cl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
#ifdef __cplusplus

#include <common_properties.h>
#include <layer_impl.h>
#include <layer_impl_cl.h>

namespace nntrainer {

/**
* @class FullyConnecedLayer
* @brief fully connected layer
*/
class FullyConnectedLayerCl : public LayerImpl {
class FullyConnectedLayerCl : public LayerImplCl {
public:
/**
* @brief Constructor of Fully Connected Layer
Expand Down Expand Up @@ -101,12 +101,19 @@ class FullyConnectedLayerCl : public LayerImpl {
*/
void setProperty(const std::vector<std::string> &values) override;

static bool registerClKernels() { return true; };

inline static const std::string type = "fully_connected";

private:
std::tuple<props::Unit>
fc_props; /**< fc layer properties : unit - number of output neurons */
std::array<unsigned int, 2> weight_idx; /**< indices of the weights */

const static int num_layer_kernels = 0; /** < number of layer kernels */

static std::vector<ClContext::SharedPtrClKernel>
layer_kernel_ptrs; /**< kernel list relevant with this layer */
};
} // namespace nntrainer

Expand Down
69 changes: 69 additions & 0 deletions nntrainer/layers/cl_layers/layer_impl_cl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Eunju Yang <[email protected]>
*
* @file layer_impl_cl.h
* @date 04 Nov 2024
* @brief This is base Layer implementation class for OpenCL
* @see https://github.com/nnstreamer/nntrainer
* @author Eunju Yang <[email protected]>
* @bug No known bugs except for NYI items
*
* @details LayerImplCl forms the base class for all the opencl_layer with
* weights and bias parameters. LayerImpl provides parsing of properties like
* Weight/bias initializer and regularizers. LayerImpl also provides checks for
* double calls to finalize function. This is wrpper class of layer_impl for
* OpenCL.
*/
#ifndef __LAYER_IMPL_CL_H__
#define __LAYER_IMPL_CL_H__
#ifdef __cplusplus

#include <cl_context.h>
#include <layer_impl.h>

namespace nntrainer {

/**
* @class LayerImplCl
* @brief LayerImplCl
*/
class LayerImplCl : public LayerImpl {

public:
/**
* @brief Constructor of Layer Class
*/
LayerImplCl() : LayerImpl(){};

/**
* @brief Destructor of Layer Class
*/
virtual ~LayerImplCl() = default;

/**
* @brief Move constructor of LayerImpl Layer.
* @param[in] LayerImplCl &&
*/
LayerImplCl(LayerImplCl &&rhs) noexcept = default;

/**
* @brief Move assignment operator.
* @parma[in] rhs LayerImplCl to be moved.
*/
LayerImplCl &operator=(LayerImplCl &&rhs) = default;

/**
* @brief register ClKernels for this layer
* registerClKernels() is called in global ClContext.
*/
static bool registerClKernels();

protected:
inline static ClContext cl_context_ref;
};

} // namespace nntrainer

#endif /** __cplusplus */
#endif /** LAYER_IMPL_CL */

0 comments on commit 0935dbc

Please sign in to comment.