forked from amazeeio/llama2-lagoon
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
53 lines (38 loc) · 1.1 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Use the image as specified
FROM python:3-slim-bookworm
# Model to use
ENV MODEL=TheBloke/openchat-3.5-0106-GGUF
# Exact filename of the model
ENV FILENAME=openchat-3.5-0106.Q6_K.gguf
# Chat format
ENV CHAT_FORMAT=openchat
# Directory to store the model
ENV DATADIR=/data
# Tell LLAMA_CUBLAS that we want to use cuBLAS
ENV LLAMA_CUBLAS=1
# Set environment variable for the host
ENV HOST=0.0.0.0
# Force cmake to run
ENV FORCE_CMAKE=1
# Set cmake args openblas on
ENV CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
# Update and upgrade the existing packages
RUN apt-get update && apt-get upgrade -y && apt-get install -y \
python3 \
python3-pip \
ninja-build \
libopenblas-dev \
pkg-config \
build-essential
# Clean up apt cache
RUN rm -rf /var/lib/apt/lists/*
# Set a working directory for better clarity
WORKDIR /app
COPY ./requirements.txt /app/requirements.txt
COPY ./start-llm.sh /app/start-llm.sh
COPY ./hug_model.py /app/hug_model.py
RUN pip install -r requirements.txt
# Expose a port for the server
EXPOSE 8000
# Run the server start script
CMD ["/app/start-llm.sh"]