[Layer] Modify Layer for mixed type

fc, conv2d, softmax, pooling layers are modified for mxied type. It supports In/Out tensors as 32/16 float type, also for weights. Signed-off-by: Jiho Chu <[email protected]>
nnstreamer · Feb 21, 2024 · 2ff98ba · 2ff98ba
1 parent 135c6de
commit 2ff98ba
Show file tree

Hide file tree

Showing 6 changed files with 577 additions and 232 deletions.
diff --git a/nntrainer/layers/conv2d_layer.cpp b/nntrainer/layers/conv2d_layer.cpp
@@ -118,10 +118,16 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim,
   if (image.getDataType() == nntrainer::Tdatatype::FP32) {
     float val;
     apply_data(&val);
-  } else if (image.getDataType() == nntrainer::Tdatatype::FP16) {
+  }
+#ifdef ENABLE_FP16
+  else if (image.getDataType() == nntrainer::Tdatatype::FP16) {
     _FP16 val;
     apply_data(&val);
   }
+#endif
+  else {
+    throw std::runtime_error("Not supported datatype");
+  }
 }
 
 /**
@@ -256,10 +262,16 @@ static void im2col(const Tensor &in, const TensorDim &kdim,
   if (out.getDataType() == nntrainer::Tdatatype::FP32) {
     float *out_data = out.getData<float>();
     apply_data(out_data);
-  } else if (out.getDataType() == nntrainer::Tdatatype::FP16) {
+  }
+#ifdef ENABLE_FP16
+  else if (out.getDataType() == nntrainer::Tdatatype::FP16) {
     _FP16 *out_data = out.getData<_FP16>();
     apply_data(out_data);
   }
+#endif
+  else {
+    throw std::runtime_error("Not supported datatype");
+  }
 }
 
 } // namespace
@@ -300,10 +312,11 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
   auto &dilation =
     std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);
 
-  TensorDim kernel_dim =
-    TensorDim(filter_size, in_dim.channel(), kernel_size[0], kernel_size[1],
-              in_dim.getTensorType());
-  TensorDim bias_dim = TensorDim(1, filter_size, 1, 1, in_dim.getTensorType());
+  auto in_t_type = in_dim.getTensorType();
+  in_t_type.data_type = context.getWeightDataType();
+  TensorDim kernel_dim = TensorDim(filter_size, in_dim.channel(),
+                                   kernel_size[0], kernel_size[1], in_t_type);
+  TensorDim bias_dim = TensorDim(1, filter_size, 1, 1, in_t_type);
 
   padding = std::get<props::Padding2D>(conv_props)
               .compute(in_dim, kernel_dim, {stride[0], stride[1]},
@@ -347,19 +360,11 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
     << "Failed to initialize: Calculated patch end is over int max";
 }
 
-void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
-  int status = ML_ERROR_NONE;
-
-  unsigned int filter_size = std::get<props::FilterSize>(conv_props);
-  auto &stride = std::get<std::array<props::Stride, CONV2D_DIM>>(conv_props);
-  auto &dilation =
-    std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);
-
-  Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
-  Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
-
-  Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]);
-
+static void forwarding_internal(
+  Tensor &input, Tensor &hidden, Tensor &filter_kernel, Tensor &bias_kernel,
+  unsigned int filter_size, const std::array<unsigned int, 4> &padding,
+  const std::array<props::Stride, CONV2D_DIM> &stride,
+  const std::array<props::Dilation, CONV2D_DIM> &dilation, bool enable_bias) {
   /** Calculate Convolution 2D
    *
    * This is the 2D Matrix Shape [ height ] x [ width ]
@@ -396,8 +401,8 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
    *   -> [Channel ( = filter_size = output_dim.channel )]
    *       x [output_dim.height x output_dim.width]
    */
-  const TensorDim &in_dim = input_.getDim();
-  const TensorDim &out_dim = hidden_.getDim();
+  const TensorDim &in_dim = input.getDim();
+  const TensorDim &out_dim = hidden.getDim();
   const TensorDim &filter_dim = filter_kernel.getDim();
   TensorDim filter_dim_squeezed{filter_kernel.batch(),
                                 filter_kernel.getDim().getFeatureLen()};
@@ -413,9 +418,9 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
     Tensor result = Tensor(calcCol2ImOutputDim(out_dim, filter_dim));
     result.setZero();
     for (unsigned int b = s; b < e; ++b) {
-      Tensor out = hidden_.getBatchSlice(b, 1);
+      Tensor out = hidden.getBatchSlice(b, 1);
       out.reshape({filter_size, out_dim.width() * out_dim.height()});
-      Tensor in_sub = input_.getBatchSlice(b, 1);
+      Tensor in_sub = input.getBatchSlice(b, 1);
 
       im2col(in_sub, filter_dim, padding, stride, dilation, result);
       filter_kernel.dot(result, out, false, true);
@@ -432,26 +437,48 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
   }
 
   filter_kernel.reshape(filter_dim);
-  if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
-      disable_bias.empty() || disable_bias.get() == false) {
-    Tensor &bias_kernel = context.getWeight(wt_idx[ConvParams::bias]);
-    status = hidden_.add_i(bias_kernel);
+  if (enable_bias) {
+    auto status = hidden.add_i(bias_kernel);
     if (status != ML_ERROR_NONE) {
       throw std::invalid_argument("[Conv2D] adding bias failed");
     }
   }
 }
 
-void Conv2DLayer::calcDerivative(RunLayerContext &context) {
+void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
+  int status = ML_ERROR_NONE;
+
   unsigned int filter_size = std::get<props::FilterSize>(conv_props);
   auto &stride = std::get<std::array<props::Stride, CONV2D_DIM>>(conv_props);
   auto &dilation =
     std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);
 
-  const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX);
-  Tensor &input_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+  Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
+  Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
+
   Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]);
+  Tensor &bias_kernel = context.getWeight(wt_idx[ConvParams::bias]);
+
+  auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
+  bool enable_bias = !disable_bias.empty() && disable_bias.get() == false;
 
+  const auto &in_type = input_.getDataType();
+  if (in_type == filter_kernel.getDataType()) {
+    forwarding_internal(input_, hidden_, filter_kernel, bias_kernel,
+                        filter_size, padding, stride, dilation, enable_bias);
+  } else {
+    Tensor filter_kernel_ = filter_kernel.clone(in_type);
+    Tensor bias_kernel_ = bias_kernel.clone(in_type);
+    forwarding_internal(input_, hidden_, filter_kernel_, bias_kernel_,
+                        filter_size, padding, stride, dilation, enable_bias);
+  }
+}
+
+static void calcDerivative_internal(
+  const Tensor &derivative, Tensor &input_derivative, Tensor &filter_kernel,
+  unsigned int filter_size, const std::array<unsigned int, 4> &padding,
+  const std::array<props::Stride, CONV2D_DIM> &stride,
+  const std::array<props::Dilation, CONV2D_DIM> &dilation) {
   TensorDim filter_dim = filter_kernel.getDim();
   TensorDim filter_dim_squeezed{filter_kernel.batch(),
                                 filter_kernel.getDim().getFeatureLen()};
@@ -489,16 +516,36 @@ void Conv2DLayer::calcDerivative(RunLayerContext &context) {
   filter_kernel.reshape(filter_dim);
 }
 
-void Conv2DLayer::calcGradient(RunLayerContext &context) {
+void Conv2DLayer::calcDerivative(RunLayerContext &context) {
   unsigned int filter_size = std::get<props::FilterSize>(conv_props);
   auto &stride = std::get<std::array<props::Stride, CONV2D_DIM>>(conv_props);
   auto &dilation =
     std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);
 
   const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX);
-  Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
+  Tensor &input_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+  Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]);
 
-  Tensor &delK = context.getWeightGrad(wt_idx[ConvParams::weight]);
+  const auto &deriv_type = derivative.getDataType();
+  if (deriv_type == filter_kernel.getDataType()) {
+    // filter_kernel = filter_kernel_.clone(input_.getDataType());
+    calcDerivative_internal(derivative, input_derivative, filter_kernel,
+                            filter_size, padding, stride, dilation);
+
+  } else {
+    // filter_kernel = filter_kernel_;
+    Tensor filter_kernel_ = filter_kernel.clone(deriv_type);
+    calcDerivative_internal(derivative, input_derivative, filter_kernel_,
+                            filter_size, padding, stride, dilation);
+  }
+}
+
+void calcGradient_internal(
+  Tensor &input, Tensor &delK, Tensor &delBias, const Tensor &derivative,
+
+  unsigned int filter_size, const std::array<unsigned int, 4> &padding,
+  const std::array<props::Stride, CONV2D_DIM> &stride,
+  const std::array<props::Dilation, CONV2D_DIM> &dilation, bool enable_bias) {
   delK.setZero();
 
   TensorDim filter_dim = delK.getDim();
@@ -514,14 +561,14 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {
 
   TensorDim out_dim_squeezed{filter_size,
                              derivative.width() * derivative.height(),
-                             input_.getTensorType()};
-  auto workers = ParallelBatch(input_.batch());
+                             input.getTensorType()};
+  auto workers = ParallelBatch(input.batch());
   /// input -(im2col)-> column_matrix -> filter x (column_matrix) = output
   /// so delK = dy x column_matrix ^ T;
   if (workers.getNumWorkers() > 1) {
 
     TensorDim delK_ext = filter_dim_squeezed;
-    delK_ext.batch(input_.batch());
+    delK_ext.batch(input.batch());
 
     Tensor delK_par = Tensor(delK_ext);
     delK_par.setZero();
@@ -536,7 +583,7 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {
         Tensor delK_sub = delK_par.getBatchSlice(b, 1);
         deriv_sub.reshape(out_dim_squeezed);
 
-        Tensor in_sub = input_.getBatchSlice(b, 1);
+        Tensor in_sub = input.getBatchSlice(b, 1);
 
         /**
          * @todo this result can be cached from the forward iteration at the
@@ -553,21 +600,20 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {
 
     workers.run();
 
-    for (unsigned int b = 0; b < input_.batch(); ++b) {
+    for (unsigned int b = 0; b < input.batch(); ++b) {
       Tensor delK_sub = delK_par.getBatchSlice(b, 1);
       delK.add_i(delK_sub);
     }
-
   } else {
     Tensor result =
       Tensor(calcCol2ImOutputDim(derivative.getDim(), filter_dim));
     result.setZero();
 
-    for (unsigned int b = 0; b < input_.batch(); ++b) {
+    for (unsigned int b = 0; b < input.batch(); ++b) {
       Tensor deriv_sub = derivative.getBatchSlice(b, 1);
       deriv_sub.reshape(out_dim_squeezed);
 
-      Tensor in_sub = input_.getBatchSlice(b, 1);
+      Tensor in_sub = input.getBatchSlice(b, 1);
 
       /**
        * @todo this result can be cached from the forward iteration at the
@@ -580,13 +626,40 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {
     result.deallocate();
   }
   delK.reshape(filter_dim);
-  if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
-      disable_bias.empty() || disable_bias.get() == false) {
-    Tensor &delBias = context.getWeightGrad(wt_idx[ConvParams::bias]);
+  if (enable_bias) {
     derivative.sum({0, 2, 3}, delBias);
   }
 }
 
+void Conv2DLayer::calcGradient(RunLayerContext &context) {
+  unsigned int filter_size = std::get<props::FilterSize>(conv_props);
+  auto &stride = std::get<std::array<props::Stride, CONV2D_DIM>>(conv_props);
+  auto &dilation =
+    std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);
+
+  const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX);
+  Tensor &input = context.getInput(SINGLE_INOUT_IDX);
+
+  Tensor &delK = context.getWeightGrad(wt_idx[ConvParams::weight]);
+  Tensor &delBias = context.getWeightGrad(wt_idx[ConvParams::bias]);
+
+  auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
+  bool enable_bias = !disable_bias.empty() && disable_bias.get() == false;
+
+  const auto &in_type = input.getDataType();
+  if (in_type == delK.getDataType()) {
+    calcGradient_internal(input, delK, delBias, derivative, filter_size,
+                          padding, stride, dilation, enable_bias);
+  } else {
+    Tensor delK_ = delK.clone(in_type);
+    Tensor delBias_ = delBias.clone(in_type);
+    calcGradient_internal(input, delK_, delBias_, derivative, filter_size,
+                          padding, stride, dilation, enable_bias);
+    delK.copyData(delK_);
+    delBias.copyData(delBias_);
+  }
+}
+
 void Conv2DLayer::exportTo(Exporter &exporter,
                            const ml::train::ExportMethods &method) const {
   LayerImpl::exportTo(exporter, method);