Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Wait for #2574] [ Context ] Add loss scale in Context & using mse loss #2580

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Applications/KNN/jni/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ e = executable('knn_sample',
install_dir: application_install_dir
)

test('app_knn', e, args: [nntr_app_resdir / 'KNN'])
test('app_knn', e, args: [nntr_app_resdir / 'KNN/'])
17 changes: 11 additions & 6 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,19 @@ warning_c_flags = [
'-Wno-error=varargs'
]

arch = host_machine.cpu_family()

if get_option('enable-avx')
extra_defines += '-DUSE_AVX=1'
if get_option('platform') == 'tizen'
add_project_arguments(['-mavx2'], language: ['c','cpp'])
else
add_project_arguments(['-march=native'], language: ['c','cpp'])
endif
message('-march=native added for AVX hardware acceleration.')
endif

if get_option('enable-fp16')
arch = host_machine.cpu_family()
if get_option('platform') == 'android'
add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
extra_defines += '-DENABLE_FP16=1'
Expand Down Expand Up @@ -105,11 +115,6 @@ if get_option('enable-fp16')
if cc.version().version_compare('>=12.1.0')
message ('Float16 for x86_64 enabled. Modern gcc-x64 generally supports float16 with _Float16.')
extra_defines += '-DENABLE_FP16=1'
if get_option('enable-avx')
extra_defines += '-DUSE_AVX=1'
add_project_arguments(['-march=native'], language: ['c','cpp'])
message('-march=native added for AVX hardware acceleration.')
endif
else
warning ('Float16 for x86_64 enabled. However, software emulation is applied for fp16, making it slower and inconsistent. Use GCC 12+ for FP16 support. This build will probably fail unless you bring a compiler that supports fp16 for x64.')
endif
Expand Down
14 changes: 9 additions & 5 deletions nntrainer/graph/network_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,9 +768,10 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
* node is going to be used with in-place optimizations.
*/
auto out_specs = init_context.getOutSpecs();

/// @note try move inplace control to finalize
bool shared_var = false, shared_grad = false;
if (lnode->executeInPlace() != InPlace::NONE) {
if (lnode->executeInPlace() != InPlace::NONE && lnode->supportInPlace()) {
setInplaceSharedMemoryConfigByLayer(lnode, shared_var, shared_grad);
for (unsigned int i = 0; i < out_specs.size(); ++i) {
auto &s = out_specs.at(i);
Expand Down Expand Up @@ -879,7 +880,8 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
lnode->getTrainable(), shared_weight_names),
inputs, outputs,
tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
lnode->getTrainable(), shared_tensor_names));
lnode->getTrainable(), shared_tensor_names),
init_context.getLossScale());

return outputs;
}
Expand Down Expand Up @@ -1027,7 +1029,8 @@ NetworkGraph::refinalizeContext(const std::shared_ptr<LayerNode> &lnode,
// TODO: update weights spec for trainable based on layer trainable prop
weights, inputs, outputs,
tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
lnode->getTrainable(), shared_tensor_names));
lnode->getTrainable(), shared_tensor_names),
init_context.getLossScale());

return outputs;
}
Expand Down Expand Up @@ -1556,8 +1559,9 @@ void NetworkGraph::requestOptimizerVariable(
const TensorDim &dim = w->getDim();
std::vector<TensorDim> dims = cb(dim);
w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
dims, w->getName(), TensorLifespan::MAX_LIFESPAN,
w->isGradientClipByGlobalNorm(), Tensor::Initializer::ZEROS));
dims, w->getName(), ":opt", TensorLifespan::MAX_LIFESPAN,
w->isGradientClipByGlobalNorm(), w->isMixedPrecision(),
Tensor::Initializer::ZEROS));
}
}
}
Expand Down
19 changes: 16 additions & 3 deletions nntrainer/layers/input_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ namespace nntrainer {
static constexpr size_t SINGLE_INOUT_IDX = 0;

InputLayer::InputLayer() :
Layer(),
input_props(props::Normalization(), props::Standardization()) {}
Layer(), input_props(props::Normalization(), props::Standardization()) {}

void InputLayer::setProperty(const std::vector<std::string> &values) {
auto remain_props = loadProperties(values, input_props);
Expand All @@ -47,7 +46,7 @@ void InputLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
if (!context.executeInPlace()) {
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
hidden_.copy(input_);
hidden_.copyData(input_);
}

if (std::get<props::Normalization>(input_props))
Expand All @@ -70,7 +69,21 @@ void InputLayer::finalize(InitLayerContext &context) {

std::vector<TensorDim> output_dims = context.getInputDimensions();

for (auto &d : output_dims) {
d.setDataType(context.getActivationDataType());
}

context.setOutputDimensions(output_dims);

is_inplace = true;

/**
* @note Input Layer assuems that the FP32 IN Tensor always. Therefore, if the
* activation data type is not fp32, then it does not support in-place
* operation.
*/
if (context.getActivationDataType() != ml::train::TensorDim::DataType::FP32)
is_inplace = false;
}

} /* namespace nntrainer */
3 changes: 2 additions & 1 deletion nntrainer/layers/input_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class InputLayer : public Layer {
/**
* @copydoc Layer::supportInPlace()
*/
bool supportInPlace() const override { return true; }
bool supportInPlace() const override { return is_inplace; }

/**
* @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
Expand All @@ -105,6 +105,7 @@ class InputLayer : public Layer {

private:
std::tuple<props::Normalization, props::Standardization> input_props;
bool is_inplace;
};
} // namespace nntrainer

Expand Down
16 changes: 15 additions & 1 deletion nntrainer/layers/layer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,14 @@ const std::vector<VarGradSpecV2> &InitLayerContext::getOutSpecs() const {
}

RunLayerContext::RunLayerContext(const std::string &name, bool trainable,
float l, bool in_place_,
float l, bool in_place_, float loss_scale_,
const std::vector<Weight *> &w,
const std::vector<Var_Grad *> &in,
const std::vector<Var_Grad *> &out,
const std::vector<Var_Grad *> &t) :
loss(l),
in_place(in_place_),
loss_scale(loss_scale_),
weights(w),
inputs(in),
outputs(out),
Expand Down Expand Up @@ -169,6 +170,19 @@ Tensor &RunLayerContext::getWeightGrad(unsigned int idx) const {
return weights[idx]->getGradientRef();
}

/**
* @brief Get the Weight Gradient tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight grad tensor
*/
Tensor &RunLayerContext::getWeightFP32(unsigned int idx) const {
if (!weights[idx]->hasGradient())
throw std::invalid_argument(
"Requesting gradient for a non-trainable weight.");
return weights[idx]->getVariableFP32Ref();
}

/**
* @brief Get the Weight Optimizer Variable tensor object
*
Expand Down
45 changes: 41 additions & 4 deletions nntrainer/layers/layer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class InitLayerContext {
const float max_norm = 0.0,
std::array<std::string, 3> tensor_type_ = {"NCHW", "FP32",
"FP32"},
const float loss_scale = 0.0);
const float loss_scale = 1.0);
/**
* @brief get Tensor Format of Layer
*
Expand Down Expand Up @@ -348,6 +348,14 @@ class InitLayerContext {
*/
bool executeInPlace() const { return in_place; }

/**
* @brief get Initial value of Loss_Scale. This is set to RunLayerContext
* and updated
*
* @return loss_scale
*/
float getLossScale() const { return loss_scale; }

private:
std::vector<TensorDim> input_dim; /**< Input dimensions for the layer */
bool in_place; /**< if the layer is expected to run in-place */
Expand Down Expand Up @@ -385,7 +393,7 @@ class RunLayerContext {
* @brief Construct a new Run Layer Context object
*
*/
RunLayerContext() : loss(0.0), in_place(false) {}
RunLayerContext() : loss(0.0), in_place(false), loss_scale(1.0) {}

/**
* @brief Construct a new Run Layer Context object
Expand All @@ -396,20 +404,33 @@ class RunLayerContext {
std::get<props::Name>(props).set(name);
}

/**
* @brief Construct a new Run Layer Context object
*
*/
RunLayerContext(const std::string &name, bool in_place_, float loss_scale_) :
RunLayerContext() {
in_place = in_place_;
std::get<props::Name>(props).set(name);
loss_scale = loss_scale_;
}

/**
* @brief Construct a new Run Layer Context object
*
* @param name name of the layer
* @param trainable if the layer is trainable
* @param l loss of the layer
* @param in_place_ execution in-place of the layer
* @param loss_scale loss_scale of the layer
* @param w weights of the layer
* @param in inputs of the layer
* @param out outputs of the layer
* @param t extra tensors of the layer
*/
RunLayerContext(const std::string &name, bool trainable, float l,
bool in_place_, const std::vector<Weight *> &w,
bool in_place_, float loss_scale_,
const std::vector<Weight *> &w,
const std::vector<Var_Grad *> &in,
const std::vector<Var_Grad *> &out,
const std::vector<Var_Grad *> &t);
Expand Down Expand Up @@ -463,6 +484,15 @@ class RunLayerContext {
Tensor &getWeightGrad(unsigned int idx) const;

/**
* @brief Get the Weight Gradient tensor object
*
* @param idx Identifier of the weight
* @return Tensor& Reference to the weight grad tensor
*/
Tensor &getWeightFP32(unsigned int idx) const;

/**

* @brief Get the Weight Optimizer Variable tensor object
*
* @param idx Identifier of the weight
Expand Down Expand Up @@ -874,10 +904,17 @@ class RunLayerContext {
*/
ml::train::LayerComputeEngine getComputeEngine() { return compute_engine; }

/**
* @brief get loss scale
* @return loss scale
*/
float getLossScale() { return loss_scale; }

private:
std::tuple<props::Name, props::Trainable> props; /**< props of the layer */
float loss; /**< loss of the layer */
bool in_place; /**< if the layer is expected to run in-place */
bool in_place; /**< if the layer is expected to run in-place */
float loss_scale; /**< loss_scale of the layer */

std::vector<Weight *> weights; /**< weights of the layer */
std::vector<Var_Grad *> inputs; /**< inputs of the layer */
Expand Down
9 changes: 5 additions & 4 deletions nntrainer/layers/layer_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ InitLayerContext LayerNode::finalize(const std::vector<TensorDim> &input_dims,

const auto &scope = getSharedFrom().empty() ? getName() : getSharedFrom();
float max_norm = 0.0;
float loss_scale = 0.0;
float loss_scale = 1.0;
if (!std::get<props::ClipGradByGlobalNorm>(*layer_node_props).empty())
max_norm = std::get<props::ClipGradByGlobalNorm>(*layer_node_props).get();

Expand Down Expand Up @@ -864,10 +864,11 @@ float LayerNode::getLoss() const { return *loss; }
void LayerNode::configureRunContext(const std::vector<Weight *> &weights,
const std::vector<Var_Grad *> &inputs,
const std::vector<Var_Grad *> &outputs,
const std::vector<Var_Grad *> &tensors) {
const std::vector<Var_Grad *> &tensors,
float loss_scale) {
run_context = std::make_unique<RunLayerContext>(
getName(), getTrainable(), 0.0f, executeInPlace() != InPlace::NONE, weights,
inputs, outputs, tensors);
getName(), getTrainable(), 0.0f, executeInPlace() != InPlace::NONE,
loss_scale, weights, inputs, outputs, tensors);
}

/**
Expand Down
12 changes: 7 additions & 5 deletions nntrainer/layers/layer_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
const std::vector<TensorDim> getOutputDimensions() const;
/**
* @brief Get the Weight object
* currently, only unittest uses this func.
*
* @param idx Identifier of the weight
* @return Weight& Reference to the weight
Expand All @@ -495,11 +496,11 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
NNTR_THROW_IF(!run_context, std::runtime_error)
<< __func__ << " layer needs to be finalized first!";
if (run_context->weightHasGradient(idx)) {
return Weight(run_context->getWeight(idx),
run_context->getWeightGrad(idx),
run_context->getWeightName(idx));
return Weight(
run_context->getWeight(idx), run_context->getWeightGrad(idx),
run_context->getWeightFP32(idx), run_context->getWeightName(idx));
} else {
return Weight(run_context->getWeight(idx), Tensor(),
return Weight(run_context->getWeight(idx), Tensor(), Tensor(),
run_context->getWeightName(idx));
}
}
Expand Down Expand Up @@ -819,7 +820,8 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
void configureRunContext(const std::vector<Weight *> &weights,
const std::vector<Var_Grad *> &inputs,
const std::vector<Var_Grad *> &outputs,
const std::vector<Var_Grad *> &tensors);
const std::vector<Var_Grad *> &tensors,
float loss_scale);

/**
* @brief Preset modes for printing summary for the layer
Expand Down
9 changes: 8 additions & 1 deletion nntrainer/layers/loss/loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ void LossLayer::finalize(InitLayerContext &context) {
d.setDataType(
str_converter<enum_class_prop_tag,
nntrainer::TensorDataTypeInfo>::from_string("FP32"));

context.setOutputDimensions(output_dim);
}

Expand All @@ -36,6 +36,13 @@ void LossLayer::updateLoss(RunLayerContext &context, const Tensor &l) {
context.setLoss(loss_sum / (float)l.batch());
}

void LossLayer::applyLossScale(RunLayerContext &context, Tensor &ret_deriv) {

float loss_scale = context.getLossScale();
if (loss_scale != 1.0)
ret_deriv.multiply_i(loss_scale);
}

/**
* @copydoc Layer::setProperty(const std::vector<std::string> &values)
*/
Expand Down
7 changes: 7 additions & 0 deletions nntrainer/layers/loss/loss_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ class LossLayer : public Layer {
*/
void updateLoss(RunLayerContext &context, const Tensor &l);

/**
* @brief update return derivative with loss scale
* @param context Run context to update
* @param return_dev Tensor data to calculate
*/
void applyLossScale(RunLayerContext &context, Tensor &l);

Tensor
l; /**< loss tensor to store intermediate value to calculate loss value */
};
Expand Down
Loading