Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ FSU ] Enabls Asynchronos FSU for forwarding #2813

Merged
merged 2 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions Applications/SimpleFC/jni/Android.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)

# ndk path
ifndef ANDROID_NDK
$(error ANDROID_NDK is not defined!)
endif

NNTRAINER_ROOT := ../nntrainer/

NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/include/

LOCAL_MODULE := nntrainer
LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/lib/$(TARGET_ARCH_ABI)/libnntrainer.so
LOCAL_EXPORT_C_INCLUDES := $(NNTRAINER_INCLUDES)

include $(PREBUILT_SHARED_LIBRARY)

include $(CLEAR_VARS)

LOCAL_MODULE := ccapi-nntrainer
LOCAL_SRC_FILES := $(NNTRAINER_ROOT)/lib/$(TARGET_ARCH_ABI)/libccapi-nntrainer.so
LOCAL_EXPORT_C_INCLUDES := $(NNTRAINER_INCLUDES) $(NNTRAINER_INCLUDES)/nntrainer

include $(PREBUILT_SHARED_LIBRARY)

include $(CLEAR_VARS)

CIFARDIR = ../../utils/datagen/cifar

LOCAL_ARM_NEON := true
LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib
LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/
LOCAL_CXXFLAGS += -std=c++17 -frtti
LOCAL_CFLAGS += -pthread -fexceptions -fopenmp -static-openmp
LOCAL_LDFLAGS += -fexceptions
LOCAL_MODULE_TAGS := optional
LOCAL_ARM_MODE := arm
LOCAL_MODULE := nntrainer_simplefc
LOCAL_LDLIBS := -llog -landroid -fopenmp -static-openmp

LOCAL_SRC_FILES := main.cpp $(CIFARDIR)/cifar_dataloader.cpp

LOCAL_SHARED_LIBRARIES := nntrainer ccapi-nntrainer

LOCAL_C_INCLUDES += $(NNTRAINER_INCLUDES) $(CIFARDIR)

include $(BUILD_EXECUTABLE)
3 changes: 3 additions & 0 deletions Applications/SimpleFC/jni/Application.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
APP_ABI := arm64-v8a
APP_STL := c++_shared
APP_PLATFORM := android-29
270 changes: 270 additions & 0 deletions Applications/SimpleFC/jni/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Jijoong Moon <[email protected]>
*
* @file main.cpp
* @date 10 Dec 2024
* @brief Test Application for Asynch FSU
* @see https://github.com/nnstreamer/nntrainer
* @author Jijoong Moon <[email protected]>
* @bug No known bugs except for NYI items
*/
#include <array>
#include <chrono>
#include <ctime>
#include <iostream>
#include <memory>
#include <sstream>
#include <vector>

#if defined(ENABLE_TEST)
#include <gtest/gtest.h>
#endif

#include <layer.h>
#include <model.h>
#include <optimizer.h>

#include <cifar_dataloader.h>

#ifdef PROFILE
#include <profiler.h>
#endif

using LayerHandle = std::shared_ptr<ml::train::Layer>;
using ModelHandle = std::unique_ptr<ml::train::Model>;

using UserDataType = std::unique_ptr<nntrainer::util::DataLoader>;

/** cache loss values post training for test */
float training_loss = 0.0;
float validation_loss = 0.0;

/**
* @brief make "key=value" from key and value
*
* @tparam T type of a value
* @param key key
* @param value value
* @return std::string with "key=value"
*/
template <typename T>
static std::string withKey(const std::string &key, const T &value) {
std::stringstream ss;
ss << key << "=" << value;
return ss.str();
}

template <typename T>
static std::string withKey(const std::string &key,
std::initializer_list<T> value) {
if (std::empty(value)) {
throw std::invalid_argument("empty data cannot be converted");
}

std::stringstream ss;
ss << key << "=";

auto iter = value.begin();
for (; iter != value.end() - 1; ++iter) {
ss << *iter << ',';
}
ss << *iter;

return ss.str();
}

/**
* @brief Create network
*
* @return vector of layers that contain full graph of asynch
*/
std::vector<LayerHandle> createGraph() {
using ml::train::createLayer;

std::vector<LayerHandle> layers;

layers.push_back(createLayer(
"input", {withKey("name", "input0"), withKey("input_shape", "1:1:320")}));

layers.push_back(createLayer("fully_connected",
{withKey("unit", 100),
withKey("weight_initializer", "xavier_uniform"),
withKey("bias_initializer", "zeros")}));

layers.push_back(createLayer("fully_connected",
{withKey("unit", 100),
withKey("weight_initializer", "xavier_uniform"),
withKey("bias_initializer", "zeros")}));

return layers;
}

ModelHandle create() {
ModelHandle model = ml::train::createModel(ml::train::ModelType::NEURAL_NET,
{withKey("loss", "mse")});

for (auto &layer : createGraph()) {
model->addLayer(layer);
}

return model;
}

int trainData_cb(float **input, float **label, bool *last, void *user_data) {
auto data = reinterpret_cast<nntrainer::util::DataLoader *>(user_data);

data->next(input, label, last);
return 0;
}

int validData_cb(float **input, float **label, bool *last, void *user_data) {
auto data = reinterpret_cast<nntrainer::util::DataLoader *>(user_data);

data->next(input, label, last);
return 0;
}

void createAndRun(unsigned int epochs, unsigned int batch_size,
UserDataType &train_user_data,
UserDataType &valid_user_data) {

// setup model
ModelHandle model = create();
model->setProperty(
{withKey("batch_size", batch_size), withKey("epochs", epochs),
// withKey("save_path", "model_full.bin")});
// withKey("save_path", "model_full.bin"), withKey("memory_swap",
// "true")});
withKey("memory_swap", "true"), withKey("memory_swap_lookahead", "1"),
withKey("model_tensor_type", "FP16-FP16")});

auto optimizer = ml::train::createOptimizer("sgd", {"learning_rate=0.001"});
model->setOptimizer(std::move(optimizer));

int status = model->compile(ml::train::ExecutionMode::INFERENCE);
if (status) {
throw std::invalid_argument("model compilation failed!");
}

status = model->initialize(ml::train::ExecutionMode::INFERENCE);
if (status) {
throw std::invalid_argument("model initialization failed!");
}

auto dataset_train = ml::train::createDataset(
ml::train::DatasetType::GENERATOR, trainData_cb, train_user_data.get());
auto dataset_valid = ml::train::createDataset(
ml::train::DatasetType::GENERATOR, validData_cb, valid_user_data.get());

// to test asynch fsu, we do need save the model weight data in file
model->save("simplefc_weight_fp16_fp16_100.bin",
ml::train::ModelFormat::MODEL_FORMAT_BIN);
model->load("./simplefc_weight_fp16_fp16_100.bin");

model->summarize(std::cout, ML_TRAIN_SUMMARY_MODEL);

uint feature_size = 320;

float input[320];
float label[1];

for (uint j = 0; j < feature_size; ++j)
input[j] = j;

std::vector<float *> in;
std::vector<float *> l;
std::vector<float *> answer;

in.push_back(input);
l.push_back(label);

answer = model->inference(1, in, l);

in.clear();
l.clear();

std::cout << "done" << std::endl;
}

std::array<UserDataType, 2>
createFakeDataGenerator(unsigned int batch_size,
unsigned int simulated_data_size,
unsigned int data_split) {
UserDataType train_data(new nntrainer::util::RandomDataLoader(
{{batch_size, 1, 1, 320}}, {{batch_size, 1, 1, 100}},
simulated_data_size / data_split));
UserDataType valid_data(new nntrainer::util::RandomDataLoader(
{{batch_size, 1, 1, 320}}, {{batch_size, 1, 1, 100}},
simulated_data_size / data_split));

return {std::move(train_data), std::move(valid_data)};
}

std::array<UserDataType, 2>
createRealDataGenerator(const std::string &directory, unsigned int batch_size,
unsigned int data_split) {

UserDataType train_data(new nntrainer::util::Cifar100DataLoader(
directory + "/train.bin", batch_size, data_split));
UserDataType valid_data(new nntrainer::util::Cifar100DataLoader(
directory + "/test.bin", batch_size, data_split));

return {std::move(train_data), std::move(valid_data)};
}

int main(int argc, char *argv[]) {
auto start = std::chrono::system_clock::now();
std::time_t start_time = std::chrono::system_clock::to_time_t(start);
std::cout << "started computation at " << std::ctime(&start_time)
<< std::endl;

#ifdef PROFILE
auto listener =
std::make_shared<nntrainer::profile::GenericProfileListener>();
nntrainer::profile::Profiler::Global().subscribe(listener);
#endif

std::string data_dir = "fake";
uint batch_size = 1;
uint data_split = 1;
uint epoch = 1;

std::array<UserDataType, 2> user_datas;

try {
if (data_dir == "fake") {
user_datas = createFakeDataGenerator(batch_size, 512, data_split);
} else {
user_datas = createRealDataGenerator(data_dir, batch_size, data_split);
}
} catch (const std::exception &e) {
std::cerr << "uncaught error while creating data generator! details: "
<< e.what() << std::endl;
return EXIT_FAILURE;
}

auto &[train_user_data, valid_user_data] = user_datas;

try {
createAndRun(epoch, batch_size, train_user_data, valid_user_data);
} catch (const std::exception &e) {
std::cerr << "uncaught error while running! details: " << e.what()
<< std::endl;
return EXIT_FAILURE;
}
auto end = std::chrono::system_clock::now();

std::chrono::duration<double> elapsed_seconds = end - start;
std::time_t end_time = std::chrono::system_clock::to_time_t(end);

std::cout << "finished computation at " << std::ctime(&end_time)
<< "elapsed time: " << elapsed_seconds.count() << "s\n";

#ifdef PROFILE
std::cout << *listener;
#endif

int status = EXIT_SUCCESS;
return status;
}
28 changes: 28 additions & 0 deletions Applications/SimpleFC/jni/meson.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
app_sources = [
'main.cpp',
cifar_path / 'cifar_dataloader.cpp'
]

app_dependencies = [app_utils_dep,
iniparser_dep,
nntrainer_dep,
nntrainer_ccapi_dep
]

if get_option('enable-test')
app_dependencies += [gtest_dep]
endif

e = executable('nntrainer_simplefc',
app_sources,
include_directories: [include_directories('.'), cifar_include_dir],
dependencies: app_dependencies,
install: get_option('install-app'),
install_dir: application_install_dir
)

if get_option('enable-long-test')
testenv = environment()
testenv.set('OPENBLAS_NUM_THREADS', '4')
test('app_asynch_fsu', e, args: ['fake', '1', '128', '1'], env: testenv, timeout: 300)
endif
1 change: 1 addition & 0 deletions Applications/SimpleFC/nntrainer
2 changes: 2 additions & 0 deletions Applications/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,5 @@ if get_option('enable-tflite-backbone')
subdir('SimpleShot')
endif
subdir('PicoGPT/jni')

subdir('SimpleFC/jni')
Loading
Loading