diff --git a/nntrainer/cl_context.cpp b/nntrainer/cl_context.cpp index f606ac95ae..c835d4b2d8 100644 --- a/nntrainer/cl_context.cpp +++ b/nntrainer/cl_context.cpp @@ -32,10 +32,11 @@ std::once_flag global_cl_context_init_flag; static void add_default_object(ClContext &cc) { - FullyConnectedLayerCl::registerClKernels(); - cc.registerFactory(nntrainer::createLayer, - FullyConnectedLayerCl::type, - ml::train::LayerType::LAYER_FC); + if (FullyConnectedLayerCl::registerClKernels()) { + cc.registerFactory(nntrainer::createLayer, + FullyConnectedLayerCl::type, + ml::train::LayerType::LAYER_FC); + } // cc.registerFactory(nntrainer::createLayer, // AdditionLayerCL::type, @@ -45,16 +46,19 @@ static void add_default_object(ClContext &cc) { // SwiGLULayerCl::type, // ml::train::LayerType::LAYER_SWIGLU); - ReshapeLayerCl::registerClKernels(); - cc.registerFactory(nntrainer::createLayer, - ReshapeLayerCl::type, ml::train::LayerType::LAYER_RESHAPE); + if (ReshapeLayerCl::registerClKernels()) { + cc.registerFactory(nntrainer::createLayer, + ReshapeLayerCl::type, + ml::train::LayerType::LAYER_RESHAPE); + } // cc.registerFactory(nntrainer::createLayer, // RMSNormLayerCl::type, ml::train::LayerType::LAYER_RMSNORM); - ConcatLayerCl::registerClKernels(); - cc.registerFactory(nntrainer::createLayer, ConcatLayerCl::type, - ml::train::LayerType::LAYER_CONCAT); + if (ConcatLayerCl::registerClKernels()) { + cc.registerFactory(nntrainer::createLayer, + ConcatLayerCl::type, ml::train::LayerType::LAYER_CONCAT); + } } static void registerer(ClContext &cc) noexcept { diff --git a/nntrainer/layers/cl_layers/reshape_cl.cpp b/nntrainer/layers/cl_layers/reshape_cl.cpp index 7698966484..6b48edb27c 100644 --- a/nntrainer/layers/cl_layers/reshape_cl.cpp +++ b/nntrainer/layers/cl_layers/reshape_cl.cpp @@ -49,6 +49,24 @@ namespace nntrainer { static constexpr size_t SINGLE_INOUT_IDX = 0; +bool ReshapeLayerCl::registerClKernels() { + + ClContext::SharedPtrClKernel kernel_copy_ptr = nullptr; + + kernel_copy_ptr = cl_context_ref.registerClKernel(copy_cl_kernel_, "copy_cl"); + NNTR_THROW_IF(!kernel_copy_ptr, std::runtime_error) + << "OpenCL Error: Fail to register copy_cl kernel"; + layer_kernel_ptrs.emplace_back(kernel_copy_ptr); + + kernel_copy_ptr = + cl_context_ref.registerClKernel(copy_cl_kernel_fp16_, "copy_cl_fp16"); + NNTR_THROW_IF(!kernel_copy_ptr, std::runtime_error) + << "OpenCL Error: Fail to register copy_cl_fp16 kernel"; + layer_kernel_ptrs.emplace_back(kernel_copy_ptr); + + return true; +}; + void ReshapeLayerCl::finalize(InitLayerContext &context) { NNTR_THROW_IF(context.getNumInputs() != 1, std::invalid_argument) << "Reshape only supports 1 input for now"; @@ -98,9 +116,6 @@ void ReshapeLayerCl::incremental_forwarding(RunLayerContext &context, } } -opencl::Kernel ReshapeLayerCl::kernel_copy; -opencl::Kernel ReshapeLayerCl::kernel_copy_fp16; - void ReshapeLayerCl::ReshapeProcess(Tensor const &input, Tensor &output) { unsigned int input_batch_size, input_height, input_width, input_channels; @@ -136,11 +151,7 @@ void ReshapeLayerCl::copy_cl_fp16(const __fp16 *input, __fp16 *res, bool result = false; do { - ClContext::SharedPtrClKernel kernel_copy_ptr = - cl_context_ref.registerClKernel(copy_cl_kernel_fp16_, "copy_cl_fp16"); - if (!kernel_copy_ptr) { - break; - } + const auto &kernel_copy_ptr = layer_kernel_ptrs[Kernels::COPY_CL]; size_t dim_size = sizeof(__fp16) * input_batch_size * input_height * input_width * input_channels; @@ -219,11 +230,7 @@ void ReshapeLayerCl::copy_cl(const float *input, float *res, bool result = false; do { - ClContext::SharedPtrClKernel kernel_copy_ptr = - cl_context_ref.registerClKernel(copy_cl_kernel_, "copy_cl"); - if (!kernel_copy_ptr) { - break; - } + const auto &kernel_copy_ptr = layer_kernel_ptrs[Kernels::COPY_CL]; size_t dim_size = sizeof(float) * input_batch_size * input_height * input_width * input_channels; diff --git a/nntrainer/layers/cl_layers/reshape_cl.h b/nntrainer/layers/cl_layers/reshape_cl.h index 3d19a0e0b6..6846fcef96 100644 --- a/nntrainer/layers/cl_layers/reshape_cl.h +++ b/nntrainer/layers/cl_layers/reshape_cl.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -26,10 +27,7 @@ namespace nntrainer { * @class Reshape Layer * @brief Reshape Layer */ -class ReshapeLayerCl : public Layer { - -private: - inline static ClContext cl_context_ref; +class ReshapeLayerCl : public LayerImplCl { public: /** @@ -105,9 +103,6 @@ class ReshapeLayerCl : public Layer { inline static const std::string type = "reshape"; - static opencl::Kernel kernel_copy; - static opencl::Kernel kernel_copy_fp16; - /** * @brief Process data and dimensions for reshape operation * @param[in] input Tensor @@ -115,6 +110,11 @@ class ReshapeLayerCl : public Layer { */ void ReshapeProcess(Tensor const &input, Tensor &result); + /** + * @brief registerClKernels + */ + static bool registerClKernels(); + /** * @brief copy computation * @param[in] input float * for Input Tensor @@ -145,9 +145,13 @@ class ReshapeLayerCl : public Layer { unsigned int input_height, unsigned int input_width); #endif -protected: +private: std::tuple reshape_props; /**< reshape properties : target_shape after reshape */ + + inline static std::vector layer_kernel_ptrs; + + enum Kernels { COPY_CL, COPY_CL_FP16 }; }; } // namespace nntrainer diff --git a/test/jni/Android.mk b/test/jni/Android.mk index faaba46f45..aa9df97d48 100644 --- a/test/jni/Android.mk +++ b/test/jni/Android.mk @@ -443,13 +443,11 @@ LOCAL_SRC_FILES := \ ../unittest/layers/unittest_layers.cpp \ ../unittest/layers/unittest_layers_impl.cpp \ ../unittest/layers/unittest_layers_concat_cl.cpp \ - ../unittest/layers/unittest_layers_swiglu_cl.cpp \ ../unittest/layers/unittest_layers_fully_connected_cl.cpp \ ../unittest/layers/unittest_layers_input.cpp \ ../unittest/layers/unittest_layers_loss.cpp \ ../unittest/layers/unittest_layers_reshape_cl.cpp \ ../unittest/layers/unittest_layers_fully_connected.cpp \ - ../unittest/layers/unittest_layers_rmsnorm_cl.cpp \ ../unittest/layers/unittest_layers_batch_normalization.cpp \ ../unittest/layers/unittest_layers_layer_normalization.cpp \ ../unittest/layers/unittest_layers_convolution2d.cpp \ @@ -458,7 +456,6 @@ LOCAL_SRC_FILES := \ ../unittest/layers/unittest_layers_flatten.cpp \ ../unittest/layers/unittest_layers_activation.cpp \ ../unittest/layers/unittest_layers_addition.cpp \ - ../unittest/layers/unittest_layers_addition_cl.cpp \ ../unittest/layers/unittest_layers_multiout.cpp \ ../unittest/layers/unittest_layers_rnn.cpp \ ../unittest/layers/unittest_layers_rnncell.cpp \