Skip to content

Commit

Permalink
add tune_test
Browse files Browse the repository at this point in the history
  • Loading branch information
huangzhengxiang committed Dec 4, 2024
1 parent 3612945 commit 0f425c4
Show file tree
Hide file tree
Showing 2 changed files with 185 additions and 0 deletions.
3 changes: 3 additions & 0 deletions transformers/llm/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@ endif()
add_executable(llm_demo ${CMAKE_CURRENT_LIST_DIR}/app/llm_demo.cpp)
add_executable(ppl_demo ${CMAKE_CURRENT_LIST_DIR}/app/ppl_demo.cpp)
add_executable(embedding_demo ${CMAKE_CURRENT_LIST_DIR}/app/embedding_demo.cpp)
add_executable(tune_test ${CMAKE_CURRENT_LIST_DIR}/app/tune_test.cpp)
IF (NOT MNN_SEP_BUILD)
target_link_libraries(llm_demo ${MNN_DEPS})
target_link_libraries(ppl_demo ${MNN_DEPS})
target_link_libraries(embedding_demo ${MNN_DEPS})
target_link_libraries(tune_test ${MNN_DEPS})
ELSE ()
target_link_libraries(llm_demo ${MNN_DEPS} llm)
target_link_libraries(ppl_demo ${MNN_DEPS} llm)
target_link_libraries(embedding_demo ${MNN_DEPS} llm)
target_link_libraries(tune_test ${MNN_DEPS} llm)
ENDIF ()
182 changes: 182 additions & 0 deletions transformers/llm/engine/app/tune_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
//
// MatMulSpeed.cpp
// MNNTests
//
// Created by MNN on 2019/09/17.
// Copyright © 2018, Alibaba Group Holding Limited
//

#include <math.h>
#include <MNN/expr/Expr.hpp>
#include <MNN/expr/ExprCreator.hpp>
#include <MNN/expr/Optimizer.hpp>
#include "MNN_generated.h"
#define MNN_OPEN_TIME_TRACE
#include <MNN/AutoTime.hpp>

#include "llm/llm.hpp"
#include "evaluation/dataset.hpp"
#include <MNN/AutoTime.hpp>
#include <MNN/expr/ExecutorScope.hpp>
#include <fstream>
#include <sstream>
#include <stdlib.h>
#include <initializer_list>

using namespace MNN::Express;
using namespace MNN::Transformer;

static void trace_prepare(Llm* llm) {
MNN_PRINT("Prepare for resize opt Begin\n");
llm->trace(true);
std::ostringstream cacheOs;
llm->generate(std::initializer_list<int>{200, 200}, &cacheOs, "");
MNN_PRINT("Prepare for resize opt End\n");
llm->trace(false);
llm->reset();
}

static void fillFloat(float* dst, int h, int w, float offset = 0.0f) {
for (int y = 0; y < h; ++y) {
auto dstY = dst + w * y;
for (int x = 0; x < w; ++x) {
dstY[x] = ((float)x * 0.1f + (float)y + offset) / 10000.0f;
}
}
}

static MNNForwardType backend_type_convert(const std::string& type_str) {
if (type_str == "cpu") return MNN_FORWARD_CPU;
if (type_str == "metal") return MNN_FORWARD_METAL;
if (type_str == "cuda") return MNN_FORWARD_CUDA;
if (type_str == "opencl") return MNN_FORWARD_OPENCL;
if (type_str == "opengl") return MNN_FORWARD_OPENGL;
if (type_str == "vulkan") return MNN_FORWARD_VULKAN;
if (type_str == "npu") return MNN_FORWARD_NN;
return MNN_FORWARD_AUTO;
}

float profileMatMul(int e, int h, int l) {
// Test MatMul
// prepare MatMul config
std::unique_ptr<MNN::OpT> op(new MNN::OpT);
op->type = MNN::OpType_MatMul;
op->main.type = MNN::OpParameter_MatMul;
op->main.value = new MNN::MatMulT;
auto matmulParam = op->main.AsMatMul();
matmulParam->transposeA = false;
matmulParam->transposeB = false;

// prepare input and output
auto x0 = _Input({}, NHWC, halide_type_of<float>());
auto x1 = _Input({}, NHWC, halide_type_of<float>());
x0->resize({e, l});
x1->resize({l, h});
auto y = Variable::create(Expr::create(op.get(), {x0, x1}));
Variable::prepareCompute({y});
fillFloat(x0->writeMap<float>(), e, l);
fillFloat(x1->writeMap<float>(), l, h);

// Test for 5 times
const auto time = 5;
MNN::Timer _t;
for (int t = 0; t < time; ++t) {
x0->writeMap<float>();
x1->writeMap<float>();
y->readMap<float>();
}
float timeCost = _t.durationInUs() / 1000.0f / (float)time;
float flops = (float)e * (float)l * (float)h / timeCost / 1000.0f / 1000.0f;
MNN_PRINT("[%d, %d, %d], Avg time: %f ms , flops: %f G\n", e, l, h, timeCost, flops);
return timeCost;
}

float profileLLM(Llm* llm, int prefill_len) {
llm->trace(true);
std::vector<int> test_prompt(prefill_len, 200);
MNN::Timer _t;
llm->forward(test_prompt, true);
float timeCost = _t.durationInUs() / 1000.0f;
llm->trace(false);
llm->reset();
MNN_PRINT("[%d], LLM Prefill time: %f ms\n", prefill_len, timeCost);
return timeCost;
}

std::shared_ptr<Executor> init_tune(MNNForwardType type, int thread) {
int precision = (int)MNN::BackendConfig::Precision_Low;
int memory = (int)MNN::BackendConfig::Memory_Low;
const char* flag = "";
MNN::BackendConfig config;
config.precision = (MNN::BackendConfig::PrecisionMode)precision;
config.memory = (MNN::BackendConfig::MemoryMode)memory;
auto exe = MNN::Express::Executor::newExecutor(type, config, thread);
if (exe == nullptr) {
MNN_PRINT("Can't create executor with type:%d, exit!\n", type);
}
return exe;
}

void tune(Llm* llm) {
// void tune() {
// problems for direct tuning: 1. low frequency, 2. don't know if tempcache need to be stored for opencl.
// test: setCache for OpenCL
float inf = 10000.f;
int hidden_size = 1536;
int intermediate_size = hidden_size*3;

std::vector<int> prefill_len_list = {100, 200, 400, 600, 800, 1000, 1500};
// std::vector<int> prefill_len_list = {100};

float qkv_sim_cpu = inf;
float up_proj_sim_cpu = inf;
float down_proj_sim_cpu = inf;
float prefill_llm_sim_cpu = inf;
auto cpu_exe = init_tune(backend_type_convert("cpu"), 4);
for (auto prefill_len : prefill_len_list){
MNN::Express::ExecutorScope scope(cpu_exe);
// qkv_sim_cpu = profileMatMul(prefill_len, hidden_size, hidden_size);
// up_proj_sim_cpu = profileMatMul(prefill_len, hidden_size, intermediate_size);
// down_proj_sim_cpu = profileMatMul(prefill_len, intermediate_size, hidden_size);
// prefill_llm_sim_cpu = profileLLM(llm, prefill_len);
}

float qkv_sim_opencl = inf;
float up_proj_sim_opencl = inf;
float down_proj_sim_opencl = inf;
float prefill_llm_sim_opencl = inf;
auto opencl_exe = init_tune(backend_type_convert("opencl"), 68);
if (opencl_exe != nullptr) {
for (auto prefill_len : prefill_len_list) {
MNN::Express::ExecutorScope scope(opencl_exe);
// qkv_sim_opencl = profileMatMul(prefill_len, hidden_size, hidden_size);
// up_proj_sim_opencl = profileMatMul(prefill_len, hidden_size, intermediate_size);
// down_proj_sim_opencl = profileMatMul(prefill_len, intermediate_size, hidden_size);
prefill_llm_sim_opencl = profileLLM(llm, prefill_len);
}
}
}

int main(int argc, const char* argv[]) {
if (argc < 2) {
std::cout << "Usage: " << argv[0] << " config.json [performance.txt]" << std::endl;
return 0;
}
std::string config_path = argv[1];
std::cout << "config path is " << config_path << std::endl;
std::unique_ptr<Llm> llm(Llm::createLLM(config_path));
{
AUTOTIME;
llm->load();
}
{
AUTOTIME;
trace_prepare(llm.get());
}
tune(llm.get());
// tune();
// std::string prompt_file = argv[2];
// std::unique_ptr<std::ofstream> perfOS(nullptr);
// if (argc == 4) { perfOS.reset(new std::ofstream(argv[3])); }
return 0;
}

0 comments on commit 0f425c4

Please sign in to comment.