Skip to content

Commit

Permalink
[Layer] Modify Layer for mixed type
Browse files Browse the repository at this point in the history
fc, conv2d, softmax, pooling layers are modified for mxied type.
It supports In/Out tensors as 32/16 float type, also for weights.

Signed-off-by: Jiho Chu <[email protected]>
  • Loading branch information
jihochu committed Feb 21, 2024
1 parent 135c6de commit 2ff98ba
Show file tree
Hide file tree
Showing 6 changed files with 577 additions and 232 deletions.
161 changes: 117 additions & 44 deletions nntrainer/layers/conv2d_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,16 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim,
if (image.getDataType() == nntrainer::Tdatatype::FP32) {
float val;
apply_data(&val);
} else if (image.getDataType() == nntrainer::Tdatatype::FP16) {
}
#ifdef ENABLE_FP16
else if (image.getDataType() == nntrainer::Tdatatype::FP16) {
_FP16 val;
apply_data(&val);
}
#endif
else {
throw std::runtime_error("Not supported datatype");
}
}

/**
Expand Down Expand Up @@ -256,10 +262,16 @@ static void im2col(const Tensor &in, const TensorDim &kdim,
if (out.getDataType() == nntrainer::Tdatatype::FP32) {
float *out_data = out.getData<float>();
apply_data(out_data);
} else if (out.getDataType() == nntrainer::Tdatatype::FP16) {
}
#ifdef ENABLE_FP16
else if (out.getDataType() == nntrainer::Tdatatype::FP16) {
_FP16 *out_data = out.getData<_FP16>();
apply_data(out_data);
}
#endif
else {
throw std::runtime_error("Not supported datatype");
}
}

} // namespace
Expand Down Expand Up @@ -300,10 +312,11 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
auto &dilation =
std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);

TensorDim kernel_dim =
TensorDim(filter_size, in_dim.channel(), kernel_size[0], kernel_size[1],
in_dim.getTensorType());
TensorDim bias_dim = TensorDim(1, filter_size, 1, 1, in_dim.getTensorType());
auto in_t_type = in_dim.getTensorType();
in_t_type.data_type = context.getWeightDataType();
TensorDim kernel_dim = TensorDim(filter_size, in_dim.channel(),
kernel_size[0], kernel_size[1], in_t_type);
TensorDim bias_dim = TensorDim(1, filter_size, 1, 1, in_t_type);

padding = std::get<props::Padding2D>(conv_props)
.compute(in_dim, kernel_dim, {stride[0], stride[1]},
Expand Down Expand Up @@ -347,19 +360,11 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
<< "Failed to initialize: Calculated patch end is over int max";
}

void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
int status = ML_ERROR_NONE;

unsigned int filter_size = std::get<props::FilterSize>(conv_props);
auto &stride = std::get<std::array<props::Stride, CONV2D_DIM>>(conv_props);
auto &dilation =
std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);

Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);

Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]);

static void forwarding_internal(
Tensor &input, Tensor &hidden, Tensor &filter_kernel, Tensor &bias_kernel,
unsigned int filter_size, const std::array<unsigned int, 4> &padding,
const std::array<props::Stride, CONV2D_DIM> &stride,
const std::array<props::Dilation, CONV2D_DIM> &dilation, bool enable_bias) {
/** Calculate Convolution 2D
*
* This is the 2D Matrix Shape [ height ] x [ width ]
Expand Down Expand Up @@ -396,8 +401,8 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
* -> [Channel ( = filter_size = output_dim.channel )]
* x [output_dim.height x output_dim.width]
*/
const TensorDim &in_dim = input_.getDim();
const TensorDim &out_dim = hidden_.getDim();
const TensorDim &in_dim = input.getDim();
const TensorDim &out_dim = hidden.getDim();
const TensorDim &filter_dim = filter_kernel.getDim();
TensorDim filter_dim_squeezed{filter_kernel.batch(),
filter_kernel.getDim().getFeatureLen()};
Expand All @@ -413,9 +418,9 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
Tensor result = Tensor(calcCol2ImOutputDim(out_dim, filter_dim));
result.setZero();
for (unsigned int b = s; b < e; ++b) {
Tensor out = hidden_.getBatchSlice(b, 1);
Tensor out = hidden.getBatchSlice(b, 1);
out.reshape({filter_size, out_dim.width() * out_dim.height()});
Tensor in_sub = input_.getBatchSlice(b, 1);
Tensor in_sub = input.getBatchSlice(b, 1);

im2col(in_sub, filter_dim, padding, stride, dilation, result);
filter_kernel.dot(result, out, false, true);
Expand All @@ -432,26 +437,48 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
}

filter_kernel.reshape(filter_dim);
if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
disable_bias.empty() || disable_bias.get() == false) {
Tensor &bias_kernel = context.getWeight(wt_idx[ConvParams::bias]);
status = hidden_.add_i(bias_kernel);
if (enable_bias) {
auto status = hidden.add_i(bias_kernel);
if (status != ML_ERROR_NONE) {
throw std::invalid_argument("[Conv2D] adding bias failed");
}
}
}

void Conv2DLayer::calcDerivative(RunLayerContext &context) {
void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
int status = ML_ERROR_NONE;

unsigned int filter_size = std::get<props::FilterSize>(conv_props);
auto &stride = std::get<std::array<props::Stride, CONV2D_DIM>>(conv_props);
auto &dilation =
std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);

const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &input_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);

Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]);
Tensor &bias_kernel = context.getWeight(wt_idx[ConvParams::bias]);

auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
bool enable_bias = !disable_bias.empty() && disable_bias.get() == false;

const auto &in_type = input_.getDataType();
if (in_type == filter_kernel.getDataType()) {
forwarding_internal(input_, hidden_, filter_kernel, bias_kernel,
filter_size, padding, stride, dilation, enable_bias);
} else {
Tensor filter_kernel_ = filter_kernel.clone(in_type);
Tensor bias_kernel_ = bias_kernel.clone(in_type);
forwarding_internal(input_, hidden_, filter_kernel_, bias_kernel_,
filter_size, padding, stride, dilation, enable_bias);
}
}

static void calcDerivative_internal(
const Tensor &derivative, Tensor &input_derivative, Tensor &filter_kernel,
unsigned int filter_size, const std::array<unsigned int, 4> &padding,
const std::array<props::Stride, CONV2D_DIM> &stride,
const std::array<props::Dilation, CONV2D_DIM> &dilation) {
TensorDim filter_dim = filter_kernel.getDim();
TensorDim filter_dim_squeezed{filter_kernel.batch(),
filter_kernel.getDim().getFeatureLen()};
Expand Down Expand Up @@ -489,16 +516,36 @@ void Conv2DLayer::calcDerivative(RunLayerContext &context) {
filter_kernel.reshape(filter_dim);
}

void Conv2DLayer::calcGradient(RunLayerContext &context) {
void Conv2DLayer::calcDerivative(RunLayerContext &context) {
unsigned int filter_size = std::get<props::FilterSize>(conv_props);
auto &stride = std::get<std::array<props::Stride, CONV2D_DIM>>(conv_props);
auto &dilation =
std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);

const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
Tensor &input_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]);

Tensor &delK = context.getWeightGrad(wt_idx[ConvParams::weight]);
const auto &deriv_type = derivative.getDataType();
if (deriv_type == filter_kernel.getDataType()) {
// filter_kernel = filter_kernel_.clone(input_.getDataType());
calcDerivative_internal(derivative, input_derivative, filter_kernel,
filter_size, padding, stride, dilation);

} else {
// filter_kernel = filter_kernel_;
Tensor filter_kernel_ = filter_kernel.clone(deriv_type);
calcDerivative_internal(derivative, input_derivative, filter_kernel_,
filter_size, padding, stride, dilation);
}
}

void calcGradient_internal(
Tensor &input, Tensor &delK, Tensor &delBias, const Tensor &derivative,

unsigned int filter_size, const std::array<unsigned int, 4> &padding,
const std::array<props::Stride, CONV2D_DIM> &stride,
const std::array<props::Dilation, CONV2D_DIM> &dilation, bool enable_bias) {
delK.setZero();

TensorDim filter_dim = delK.getDim();
Expand All @@ -514,14 +561,14 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {

TensorDim out_dim_squeezed{filter_size,
derivative.width() * derivative.height(),
input_.getTensorType()};
auto workers = ParallelBatch(input_.batch());
input.getTensorType()};
auto workers = ParallelBatch(input.batch());
/// input -(im2col)-> column_matrix -> filter x (column_matrix) = output
/// so delK = dy x column_matrix ^ T;
if (workers.getNumWorkers() > 1) {

TensorDim delK_ext = filter_dim_squeezed;
delK_ext.batch(input_.batch());
delK_ext.batch(input.batch());

Tensor delK_par = Tensor(delK_ext);
delK_par.setZero();
Expand All @@ -536,7 +583,7 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {
Tensor delK_sub = delK_par.getBatchSlice(b, 1);
deriv_sub.reshape(out_dim_squeezed);

Tensor in_sub = input_.getBatchSlice(b, 1);
Tensor in_sub = input.getBatchSlice(b, 1);

/**
* @todo this result can be cached from the forward iteration at the
Expand All @@ -553,21 +600,20 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {

workers.run();

for (unsigned int b = 0; b < input_.batch(); ++b) {
for (unsigned int b = 0; b < input.batch(); ++b) {
Tensor delK_sub = delK_par.getBatchSlice(b, 1);
delK.add_i(delK_sub);
}

} else {
Tensor result =
Tensor(calcCol2ImOutputDim(derivative.getDim(), filter_dim));
result.setZero();

for (unsigned int b = 0; b < input_.batch(); ++b) {
for (unsigned int b = 0; b < input.batch(); ++b) {
Tensor deriv_sub = derivative.getBatchSlice(b, 1);
deriv_sub.reshape(out_dim_squeezed);

Tensor in_sub = input_.getBatchSlice(b, 1);
Tensor in_sub = input.getBatchSlice(b, 1);

/**
* @todo this result can be cached from the forward iteration at the
Expand All @@ -580,13 +626,40 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {
result.deallocate();
}
delK.reshape(filter_dim);
if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
disable_bias.empty() || disable_bias.get() == false) {
Tensor &delBias = context.getWeightGrad(wt_idx[ConvParams::bias]);
if (enable_bias) {
derivative.sum({0, 2, 3}, delBias);
}
}

void Conv2DLayer::calcGradient(RunLayerContext &context) {
unsigned int filter_size = std::get<props::FilterSize>(conv_props);
auto &stride = std::get<std::array<props::Stride, CONV2D_DIM>>(conv_props);
auto &dilation =
std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);

const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &input = context.getInput(SINGLE_INOUT_IDX);

Tensor &delK = context.getWeightGrad(wt_idx[ConvParams::weight]);
Tensor &delBias = context.getWeightGrad(wt_idx[ConvParams::bias]);

auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
bool enable_bias = !disable_bias.empty() && disable_bias.get() == false;

const auto &in_type = input.getDataType();
if (in_type == delK.getDataType()) {
calcGradient_internal(input, delK, delBias, derivative, filter_size,
padding, stride, dilation, enable_bias);
} else {
Tensor delK_ = delK.clone(in_type);
Tensor delBias_ = delBias.clone(in_type);
calcGradient_internal(input, delK_, delBias_, derivative, filter_size,
padding, stride, dilation, enable_bias);
delK.copyData(delK_);
delBias.copyData(delBias_);
}
}

void Conv2DLayer::exportTo(Exporter &exporter,
const ml::train::ExportMethods &method) const {
LayerImpl::exportTo(exporter, method);
Expand Down
Loading

0 comments on commit 2ff98ba

Please sign in to comment.