Skip to content

Commit

Permalink
Implement AdamW
Browse files Browse the repository at this point in the history
This commit implements the AdamW algorithm in the form of an `Optimizer`.
Some (potential) further tasks include:
- Merging the implementations of Adam and AdamW, so that `AdamW` inherits from `Adam`
- Adding more unit tests

Signed-off-by: Daniel Jang <[email protected]>
  • Loading branch information
jangdan committed Nov 21, 2024
1 parent 3ccea3d commit 9865949
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ NNTrainer Provides
|:-------:|:---:|:---:|
| sgd | Stochastic Gradient Decent | - |
| adam | Adaptive Moment Estimation | - |
| adamw | Adam with decoupled weight decay regularization | - |

| Keyword | Learning Rate | Description |
|:-------:|:---:|:---:|
Expand Down
9 changes: 9 additions & 0 deletions api/ccapi/include/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class LearningRateScheduler;
*/
enum OptimizerType {
ADAM = ML_TRAIN_OPTIMIZER_TYPE_ADAM, /** adam */
ADAMW = ML_TRAIN_OPTIMIZER_TYPE_ADAMW, /** AdamW */
SGD = ML_TRAIN_OPTIMIZER_TYPE_SGD, /** sgd */
UNKNOWN = ML_TRAIN_OPTIMIZER_TYPE_UNKNOWN /** unknown */
};
Expand Down Expand Up @@ -135,6 +136,14 @@ SGD(const std::vector<std::string> &properties = {}) {
return createOptimizer(OptimizerType::SGD, properties);
}

/**
* @brief Helper function to create AdamW Optimizer
*/
inline std::unique_ptr<Optimizer>
AdamW(const std::vector<std::string> &properties = {}) {
return createOptimizer(OptimizerType::ADAMW, properties);
}

} // namespace optimizer

/**
Expand Down
3 changes: 2 additions & 1 deletion api/nntrainer-api-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ typedef enum {
* @since_tizen 6.0
*/
typedef enum {
ML_TRAIN_OPTIMIZER_TYPE_ADAM = 0, /**< Adam Optimizer */
ML_TRAIN_OPTIMIZER_TYPE_ADAM = 0, /**< Adam Optimizer */
ML_TRAIN_OPTIMIZER_TYPE_ADAMW = 2, /**< AdamW Optimizer */
ML_TRAIN_OPTIMIZER_TYPE_SGD = 1, /**< Stochastic Gradient Descent Optimizer */
ML_TRAIN_OPTIMIZER_TYPE_UNKNOWN = 999 /**< Unknown Optimizer */
} ml_train_optimizer_type_e;
Expand Down
3 changes: 3 additions & 0 deletions nntrainer/app_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <util_func.h>

#include <adam.h>
#include <adamw.h>
#include <sgd.h>

#include <activation_layer.h>
Expand Down Expand Up @@ -235,6 +236,8 @@ static void add_default_object(AppContext &ac) {
ac.registerFactory(nntrainer::createOptimizer<SGD>, SGD::type, OptType::SGD);
ac.registerFactory(nntrainer::createOptimizer<Adam>, Adam::type,
OptType::ADAM);
ac.registerFactory(nntrainer::createOptimizer<AdamW>, AdamW::type,
OptType::ADAMW);
ac.registerFactory(AppContext::unknownFactory<nntrainer::Optimizer>,
"unknown", OptType::UNKNOWN);

Expand Down
89 changes: 89 additions & 0 deletions nntrainer/optimizers/adamw.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Daniel Jang <[email protected]>
*
* @file adamw.cpp
* @date 3 November 2024
* @see https://github.com/nnstreamer/nntrainer
* @author Jijoong Moon <[email protected]>
* @author Parichay Kapoor <[email protected]>
* @author Daniel Jang <[email protected]>
* @bug No known bugs except for NYI items
* @brief This is the AdamW Optimizer.
*/

#include <cmath>
#include <fstream>

#include <adamw.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <node_exporter.h>
#include <util_func.h>

namespace nntrainer {

AdamW::AdamW() : adam_props(PropsB1(), PropsB2(), PropsEpsilon(), TorchRef()) {
/** default properties */
auto &[b1, b2, eps, torch_ref] = adam_props;
b1.set(0.9f);
b2.set(0.999f);
eps.set(1.0e-7f);
torch_ref.set(false);
}

AdamW::~AdamW() {}

enum AdamParams { wm, wv };

std::vector<TensorDim> AdamW::getOptimizerVariableDim(const TensorDim &dim) {
return {dim, dim};
}

void AdamW::exportTo(Exporter &exporter,
const ml::train::ExportMethods &method) const {
exporter.saveResult(adam_props, method, this);
Optimizer::exportTo(exporter, method);
}

void AdamW::setProperty(const std::vector<std::string> &values) {
auto left = loadProperties(values, adam_props);
Optimizer::setProperty(left);
}

void AdamW::applyGradient(RunOptimizerContext &context) {
Tensor &x_grad = context.getGradient();

auto &beta1 = std::get<PropsB1>(adam_props).get();
auto &beta2 = std::get<PropsB2>(adam_props).get();
auto &epsilon = std::get<PropsEpsilon>(adam_props).get();
auto &torch_ref = std::get<TorchRef>(adam_props).get();

// This is implementation of adam from original paper.
// This is not deleted intentionally.
unsigned int iteration = context.getIteration();
float biasCorrection1 = 1 - pow(beta1, iteration + 1);
float biasCorrection2 = 1 - pow(beta2, iteration + 1);
Tensor &wm = context.getOptimizerVariable(AdamParams::wm);
Tensor &wv = context.getOptimizerVariable(AdamParams::wv);

wm.multiply_i(beta1);
wm.add_i(x_grad, 1.0f - beta1);

wv.multiply_i(beta2);
wv.add_i(x_grad.multiply(x_grad), 1.0f - beta2);

wv.divide_i(sqrtFloat(biasCorrection2));
std::function<double(double)> sqrtEps = [epsilon](double f) {
return 1 / (sqrtDouble(f) + epsilon);
};
Tensor &term = wv;
term.apply<float>(sqrtEps);
term.divide_i(biasCorrection1);
term.multiply_i(wm);
x_grad.add_i(term);

context.applyGradient(context.getLearningRate());
}

} // namespace nntrainer
86 changes: 86 additions & 0 deletions nntrainer/optimizers/adamw.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Daniel Jang <[email protected]>
*
* @file adamw.h
* @date 3 November 2024
* @see https://github.com/nnstreamer/nntrainer
* @author Jijoong Moon <[email protected]>
* @author Parichay Kapoor <[email protected]>
* @author Daniel Jang <[email protected]>
* @bug No known bugs except for NYI items
* @brief This is the AdamW Optimizer.
*/
#ifndef __ADAMW_H__
#define __ADAMW_H__
#ifdef __cplusplus

#include <tuple>

#include <adam.h>

#include <base_properties.h>
#include <optimizer_devel.h>

namespace nntrainer {

/**
* @class AdamW Optimizer class
* @brief AdamW Optimizer
*/
class AdamW : public Optimizer {
public:
/**
* @brief Construct a new AdamW object
*
*/
AdamW();

/**
* @brief Destroy the AdamW object
*
*/
~AdamW();

/**
* @copydoc Optimizer::getDefaultLearningRate()
*
*/
double getDefaultLearningRate() const override { return 0.001; }

/**
* @copydoc applyGradient(RunOptimizerContext &context)
*/
void applyGradient(RunOptimizerContext &context) override;

/**
* @copydoc Optimizer::getType()
*/
const std::string getType() const override { return AdamW::type; }

/**
* @copydoc Optimizer::getOptimizerVariableDim(const TensorDim &dim)
*/
std::vector<TensorDim> getOptimizerVariableDim(const TensorDim &dim) override;

/**
* @copydoc Optimizer::exportTo(Exporter &exporter, const
* ml::train::ExportMethods& method)
*/
void exportTo(Exporter &exporter,
const ml::train::ExportMethods &method) const override;

inline static const std::string type = "adamw";

/**
* @copydoc Optimizer::setProperty(const std::vector<std::string> &values)
*/
void setProperty(const std::vector<std::string> &values) override;

private:
std::tuple<PropsB1, PropsB2, PropsEpsilon, TorchRef> adam_props;
};
} /* namespace nntrainer */

#endif /* __cplusplus */
#endif /* __ADAMW_H__ */
3 changes: 2 additions & 1 deletion nntrainer/optimizers/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ optimizer_sources = [
'lr_scheduler_exponential.cpp',
'lr_scheduler_linear.cpp',
'lr_scheduler_step.cpp',
'optimizer_wrapped.cpp'
'optimizer_wrapped.cpp',
'adamw.cpp',
]

optimizer_headers = [
Expand Down
1 change: 1 addition & 0 deletions test/ccapi/unittest_ccapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ TEST(ccapi_optimizer, construct_01_n) {
TEST(ccapi_optimizer, construct_02_p) {
EXPECT_NO_THROW(ml::train::optimizer::Adam());
EXPECT_NO_THROW(ml::train::optimizer::SGD());
EXPECT_NO_THROW(ml::train::optimizer::AdamW());
}

/**
Expand Down

0 comments on commit 9865949

Please sign in to comment.