forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 1
63 lines (54 loc) · 1.73 KB
/
server-test.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Server test scenario
name: Server Integration Tests
on: [push, pull_request]
jobs:
ubuntu-latest-cmake:
runs-on: ubuntu-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential
- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake
cmake --build . --config Release -j $(nproc)
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
- name: Download test model
id: download_model
run: |
./scripts/hf.sh --repo TheBloke/phi-2-GGUF --file phi-2.Q4_K_M.gguf
- name: Server Integration Tests
id: server_integration_test
run: |
./build/bin/server \
-m phi-2.Q4_K_M.gguf \
--ctx-size 2048 \
--parallel 4 \
--n-predict 2048 \
--batch-size 512 \
--threads-batch 512 \
--alias phi-2 \
--embedding \
--cont-batching &
sh -c '\
max_attempts=10; \
attempts=${max_attempts}; \
echo "waiting for server to be ready..."; \
until curl --silent --show-error --fail "http://localhost:8080/health" | jq -r '.status' | grep ok; do \
attempts=$(( attempts - 1)); \
[ "${attempts}" -eq 0 ] && { echo "Server did not startup" >&2; exit 1; }; \
sleep $(( (max_attempts - attempts) * 2 )); \
done;'
cd examples/server/tests
behave