Skip to content

Commit

Permalink
[Application] fix some settings for LLaMA
Browse files Browse the repository at this point in the history
The ENABLE_ENCODER2 option allows you to use specific tokenizer file on server.

However, this option does not fit the general user usage scenario because the tokenizer varies from every llama version to be distributed. In addition, the situation in which tokenizer is not used at all does not conform to the general usage scenario, too.

So, now the default setting is to always use the tokenizer of the path specified by the user.

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Seungbaek Hong <[email protected]>
  • Loading branch information
baek2sm committed Jun 7, 2024
1 parent d50487a commit d119d01
Showing 1 changed file with 18 additions and 33 deletions.
51 changes: 18 additions & 33 deletions Applications/LLaMA/jni/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,22 @@
#include <swiglu.h>
#include <transpose_layer.h>

#if defined(ENABLE_ENCODER2)
#include "json.hpp"
#include <codecvt>
#include <encoder.hpp>
#include <locale>
using json = nlohmann::json;
#endif

using LayerHandle = std::shared_ptr<ml::train::Layer>;
using ModelHandle = std::unique_ptr<ml::train::Model>;

ModelHandle g_model;

// Path configurations
std::string VOCAB_JSON_FILE = "../DIR/vocab.json";
std::string MERGE_TXT_FILE = "../DIR/merges.txt";
std::string MODEL_PATH = "../DIR/model.bin";

// Hyper params for LLaMA
int const DIM = 2304;
int const NUM_LAYERS = 28;
Expand Down Expand Up @@ -573,10 +576,11 @@ void run(std::string text, bool apply_temperature) {
unsigned int init_len;

#if defined(ENABLE_ENCODER2)
std::string vocab_file_name = "../Applications/LLaMA/jni/vocab.json";
std::string merge_file_name = "../Applications/LLaMA/jni/merges.txt";
VOCAB_JSON_FILE = "../Applications/LLaMA/jni/vocab.json";
MERGE_TXT_FILE = "../Applications/LLaMA/jni/merges.txt";
#endif

auto tokenizer = unwrap(GPT2Encoder::load(vocab_file_name, merge_file_name),
auto tokenizer = unwrap(GPT2Encoder::load(VOCAB_JSON_FILE, MERGE_TXT_FILE),
"Error initializising GPT2 tokenizer\n");

std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
Expand All @@ -592,15 +596,6 @@ void run(std::string text, bool apply_temperature) {

input.push_back(input_sample);

#else
float init_input[INIT_SEQ_LEN] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40,
50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 600, 700, 800, 900};
memcpy(input_sample, init_input, sizeof(float) * INIT_SEQ_LEN);
input.push_back(input_sample);
init_len = 18;
#endif

std::vector<int64_t> token_ids;

auto output =
Expand All @@ -619,11 +614,9 @@ void run(std::string text, bool apply_temperature) {
std::cout << " Progress Reading: 100 % " << std::endl;
std::cout << std::endl << "### Output : " << std::endl;
if (init_len < INIT_SEQ_LEN) {
#if defined(ENABLE_ENCODER2)
auto decoded_str = tokenizer.decode({static_cast<int64_t>(ids)});
std::cout << decoded_str << " ";
std::cout.flush();
#endif
}

for (unsigned int i = input_len + 1; i < input_len + NUM_TO_GENERATE; ++i) {
Expand All @@ -635,11 +628,9 @@ void run(std::string text, bool apply_temperature) {
input_sample[0] = static_cast<float>(init_input[i]);
} else {
input_sample[0] = static_cast<float>(ids);
#if defined(ENABLE_ENCODER2)
auto decoded_str = tokenizer.decode({static_cast<int64_t>(ids)});
std::cout << decoded_str << " ";
std::cout.flush();
#endif
}

#ifdef ENABLE_FP16
Expand All @@ -659,12 +650,13 @@ void run(std::string text, bool apply_temperature) {
void createAndRun(unsigned int epochs, unsigned int batch_size) {
// setup model
g_model = createLLaMA();
g_model->setProperty({withKey("batch_size", batch_size),
withKey("epochs", epochs),
// #ifdef ENABLE_FP16
withKey("model_tensor_type", "FP16-FP16"),
// #endif
withKey("save_path", "test_model.bin")});
g_model->setProperty({
withKey("batch_size", batch_size),
withKey("epochs", epochs),
#ifdef ENABLE_FP16
withKey("model_tensor_type", "FP16-FP16"),
#endif
});

auto optimizer = ml::train::createOptimizer("sgd", {"learning_rate=0.001"});
g_model->setOptimizer(std::move(optimizer));
Expand All @@ -679,12 +671,9 @@ void createAndRun(unsigned int epochs, unsigned int batch_size) {
throw std::invalid_argument("model initialization failed!");
}

std::string weight_path = "./llama_fp16.bin";

g_model->load(weight_path);
g_model->load(MODEL_PATH);
}

#if defined(ENABLE_ENCODER2)
std::wstring decodeUnicodeEscape(const std::wstring &input) {
std::wstringstream result;

Expand All @@ -707,21 +696,17 @@ std::wstring decodeUnicodeEscape(const std::wstring &input) {

return result.str();
}
#endif

int main(int argc, char *argv[]) {
// Setting locale
std::locale::global(std::locale("ko_KR.UTF-8"));

#if defined(ENABLE_ENCODER2)
// Getting arguments From terminal
std::wstring input;
std::getline(std::wcin, input);
std::wstring test = decodeUnicodeEscape(input);
std::wstring_convert<std::codecvt_utf16<wchar_t>> converter;
std::string text = converter.to_bytes(test);
#else
std::string text = "This is smaple input for LLaMA.";
#endif

auto &app_context = nntrainer::AppContext::Global();
try {
Expand Down

0 comments on commit d119d01

Please sign in to comment.