diff --git a/.github/workflows/android-build.yml b/.github/workflows/android-build.yml deleted file mode 100755 index 5a436b25..00000000 --- a/.github/workflows/android-build.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: android-build -on: - push: - branches: - - master - - 'feature/**' - paths: - - 'src/**' - - 'android/**' - - '.github/workflows/android-build.yml' - pull_request: - branches: [master] - paths: - - 'src/**' - - 'android/**' - - '.github/workflows/android-build.yml' - -concurrency: - group: android-${{ github.ref }} - cancel-in-progress: true -permissions: - contents: read - -jobs: - android_adb_build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: build - run: ./script/android_build.sh - - android_app_build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: build - run: ./script/android_app_build.sh - diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index f1ff7853..ad501d1b 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -22,7 +22,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [windows-latest] steps: - uses: actions/checkout@v3 @@ -55,8 +55,8 @@ jobs: PACAGE_FILE: ${{ matrix.os }}-package.zip strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - model: [chatglm-6b, chatglm2-6b, codegeex2-6b, qwen-7b-chat, baichuan2-7b-chat, llama2-7b-chat] + os: [windows-latest] + model: [chatglm-6b] steps: - uses: actions/download-artifact@v3 @@ -70,16 +70,16 @@ jobs: unzip $PACAGE_FILE cd $PACAGE_DIR ./script/model_test.sh ${{ matrix.model }} - - name: windows-download + - name: windows-test if: matrix.os == 'windows-latest' run: | cd workspace 7z x windows-package.zip cd windows-package + ls echo ${{ matrix.model }} - ./script/model_download.ps1 ${{ matrix.model }} - - name: windows-test - if: matrix.os == 'windows-latest' - run: | + ./script/download_model.ps1 ${{ matrix.model }} + ls cd build - .\Release\cli_demo -m ..\${{ matrix.model }} \ No newline at end of file + .\Release\cli_demo -m ..\${{ matrix.model }} + ls \ No newline at end of file diff --git a/script/download_model.ps1 b/script/download_model.ps1 index 7dc0ce8f..64b11c83 100644 --- a/script/download_model.ps1 +++ b/script/download_model.ps1 @@ -3,10 +3,13 @@ param( ) mkdir $model cd $model -wget -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/embedding.mnn -OutFile embedding.mnn -wget -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/lm.mnn -OutFile lm.mnn -for($i=1; $i -lt 32; $i=$i+1) -{ - wget -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/block_$i.mnn -OutFile block_$i.mnn +$block_num = 28 +if ($model.Contains('7b')) { + $block_num = 32 } -cd .. +Invoke-WebRequest -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/embedding.mnn -OutFile embedding.mnn +Invoke-WebRequest -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/lm.mnn -OutFile lm.mnn +for ($i=1; $i -lt $block_num; $i=$i+1) { + Invoke-WebRequest -Uri https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/block_$i.mnn -OutFile block_$i.mnn +} +cd .. \ No newline at end of file diff --git a/script/download_model.sh b/script/download_model.sh index 3977f29b..5a8c5394 100755 --- a/script/download_model.sh +++ b/script/download_model.sh @@ -1,10 +1,20 @@ +if [ $# -lt 1 ]; then + echo 'Usage: ./download_model.sh $model' + exit 1 +fi + model=$1 mkdir $model cd $model +is_7b=`echo $model | grep '7b'` +block_num=27 +if [ $is_7b ]; then + block_num=31 +fi # download models wget -c -nv https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/embedding.mnn wget -c -nv https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/lm.mnn -for i in `seq 0 31` +for i in `seq 0 $block_num` do wget -c -nv https://github.com/wangzhaode/mnn-llm/releases/download/$model-mnn/block_$i.mnn done diff --git a/src/llm.cpp b/src/llm.cpp index a133f903..80e0ef24 100644 --- a/src/llm.cpp +++ b/src/llm.cpp @@ -94,12 +94,15 @@ Llm* Llm::createLLM(const std::string& path) { } std::string Llm::response(const std::string& query, std::ostream* os) { + printf("response\n"); // init status if (is_single_) { key_value_shape_.insert(key_value_shape_.begin(), layer_nums_); past_key_values_.push_back(_Input(key_value_shape_, NCHW)); } else { for (int i = 0; i < layer_nums_; i++) { + printf("past_key_values_ %d\n", i); + fflush(stdout); past_key_values_.push_back(_Input(key_value_shape_, NCHW)); } } @@ -225,6 +228,7 @@ int Llm::forward(const std::vector& input_ids) { } all_seq_len_ += seq_len; gen_seq_len_++; + printf("id = %d\n", id); return id; } @@ -290,6 +294,7 @@ std::vector Chatglm_6b::tokenizer(const std::string& query) { context_len_ = ids.size(); ids.push_back(130001); ids.push_back(130004); + printf("ids = ["); for (auto id : ids) printf("%d, "); printf("]\n"); return ids; }