Skip to content

Commit

Permalink
[Layer] Add update scale factor method
Browse files Browse the repository at this point in the history
It adds a method for updating scale factor. The scale factor needs to be
updated while calcDerivative step when it enabled.

Signed-off-by: Jiho Chu <[email protected]>
  • Loading branch information
jihochu committed Feb 21, 2024
1 parent 2ff98ba commit 68dd18b
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 24 deletions.
3 changes: 3 additions & 0 deletions nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ void CrossEntropySigmoidLossLayer::calcDerivative(RunLayerContext &context) {
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

y.apply<float>(ActiFunc::sigmoid<float>, ret_derivative);

applyLossScale(ret_derivative);

ret_derivative.subtract_i(y2);
if (ret_derivative.divide_i(ret_derivative.size()) != ML_ERROR_NONE) {
throw std::runtime_error("[CrossEntropySigmoidLossLayer::calcDerivative] "
Expand Down
14 changes: 8 additions & 6 deletions nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,22 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
#endif
}

float loss_scale = std::get<props::LossScale>(loss_props).get();

/// @note y and ret_derivative can be same here, so this has to be out-place
/// operation
// TODO: verify y and ret_derivative must not be same as loss layer is not
// working in-place
ret.subtract(y2, ret_derivative);
if (ret.getDataType() != y2.getDataType()) {
ret.subtract(y2.clone(ret.getDataType()), ret_derivative);
} else {
ret.subtract(y2, ret_derivative);
}

applyLossScale(ret_derivative);

if (ret_derivative.divide_i(ret.batch()) != ML_ERROR_NONE) {
throw std::runtime_error("[CrossEntropySoftmaxLossLayer::calcDerivative] "
"Error when calculating loss");
}

if (loss_scale != 0.0f)
ret_derivative.multiply_i(loss_scale);
}

} // namespace nntrainer
2 changes: 1 addition & 1 deletion nntrainer/layers/loss/cross_entropy_softmax_loss_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class CrossEntropySoftmaxLossLayer : public LossLayer {
~CrossEntropySoftmaxLossLayer() = default;

/**
* @copydoc Layer::forwarding(RunLayerContext &context, bool training)
* @copydoc Layer::forwarding(RunLayerContext &context, bool training
*/
void forwarding(RunLayerContext &context, bool training) override;

Expand Down
17 changes: 16 additions & 1 deletion nntrainer/layers/loss/loss_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,19 @@ class LossLayer : public Layer {
*/
virtual bool supportBackwarding() const override { return true; }

/**
* @brief Set loss scale factor
*/
virtual void setLossSacle(float scale) override { loss_scale = scale; }

private:
/**
* @copydoc Layer::requireLabel()
*/
bool requireLabel() const override { return true; }

float loss_scale; /**< loss scale factor */

protected:
/**
* @brief update loss
Expand All @@ -65,10 +73,17 @@ class LossLayer : public Layer {
*/
void updateLoss(RunLayerContext &context, const Tensor &l);

/**
* @brief apply loss scale
*/
void applyLossScale(Tensor &derivative) {
if (loss_scale != 0.0f)
derivative.multiply_i(loss_scale);
}

Tensor
l; /**< loss tensor to store intermediate value to calculate loss value */

std::tuple<props::LossScale> loss_props;
};

} // namespace nntrainer
Expand Down
4 changes: 3 additions & 1 deletion nntrainer/layers/loss/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ loss_layer_sources = [
'constant_derivative_loss_layer.cpp'
]

loss_layer_headers = []
loss_layer_headers = [
'loss_layer.h'
]

loss_layer_deps = []

Expand Down
72 changes: 57 additions & 15 deletions nntrainer/layers/loss/mse_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*
*/

#include "tensor.h"
#include <layer_context.h>
#include <mse_loss_layer.h>

Expand All @@ -20,34 +21,75 @@ static constexpr size_t SINGLE_INOUT_IDX = 0;

void MSELossLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
Tensor &y = context.getInput(SINGLE_INOUT_IDX);
Tensor &y_ = context.getInput(SINGLE_INOUT_IDX);

// hidden_ <- y2 - y;
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
y2.subtract(y, hidden_);
auto out_type = hidden_.getDataType();
if (out_type != y_.getDataType()) {
Tensor y = y_.clone(out_type);
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
y2.subtract(y, hidden_);

/** calculate sum of squares normalized by size */
float l2norm = hidden_.l2norm();
l2norm *= l2norm / hidden_.size();
/** calculate sum of squares normalized by size */
float l2norm = hidden_.l2norm();
l2norm *= l2norm / hidden_.size();

/** wrap in tensor for update loss */
Tensor l = Tensor(TensorDim(1, 1, 1, 1), &l2norm);
LossLayer::updateLoss(context, l);
}
/** wrap in tensor for update loss */
Tensor l = Tensor(TensorDim(1, 1, 1, 1), &l2norm);
LossLayer::updateLoss(context, l);
}
// fill the output
hidden_.fill(y);
} else {
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
y2.subtract(y_, hidden_);

/** calculate sum of squares normalized by size */
float l2norm = hidden_.l2norm();
l2norm *= l2norm / hidden_.size();

// fill the output
hidden_.fill(y);
/** wrap in tensor for update loss */
Tensor l = Tensor(TensorDim(1, 1, 1, 1), &l2norm);
LossLayer::updateLoss(context, l);
}
// fill the output
hidden_.fill(y_);
}
}

void MSELossLayer::calcDerivative(RunLayerContext &context) {
Tensor &ret_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
const Tensor &y2 = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

y.subtract(y2, ret_derivative);
const auto &in_type = y.getDataType();
if (in_type != y2.getDataType()) {
Tensor y2_ = y2.clone(in_type);
y.subtract(y2_, ret_derivative);
} else {
y.subtract(y2, ret_derivative);
}

applyLossScale(ret_derivative);

float divider = ((float)y.size()) / 2;
if (ret_derivative.divide_i(divider) != ML_ERROR_NONE) {

/* ret_derivative may be eliminated by big divider with fp16 calculation.
* So, it calcuated with larger precision.
*/
int ret;
if (ret_derivative.getDataType() != ml::train::TensorDim::DataType::FP32) {
Tensor ret_derivative_ =
ret_derivative.clone(ml::train::TensorDim::DataType::FP32);
ret = ret_derivative_.divide_i(divider);
ret_derivative.copyData(ret_derivative_);
} else {
ret = ret_derivative.divide_i(divider);
}

if (ret != ML_ERROR_NONE) {
throw std::runtime_error(
"[MSELossLayer::calcDerivative] Error when calculating loss");
}
Expand Down

0 comments on commit 68dd18b

Please sign in to comment.