From 76371efe45509c39a589eefc926666cf1aab044d Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Wed, 13 Dec 2023 18:20:12 -0800 Subject: [PATCH] addressed comments and local paths --- Popular_Models_Guide/Llama2/trtllm_guide.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Popular_Models_Guide/Llama2/trtllm_guide.md b/Popular_Models_Guide/Llama2/trtllm_guide.md index 2118f0fa..f9759b12 100644 --- a/Popular_Models_Guide/Llama2/trtllm_guide.md +++ b/Popular_Models_Guide/Llama2/trtllm_guide.md @@ -62,9 +62,9 @@ git submodule update --init --recursive ```bash docker run --rm -it --net host --shm-size=2g \ --ulimit memlock=-1 --ulimit stack=67108864 --gpus all \ - -v $PWD/tensorrtllm_backend:/tensorrtllm_backend \ - -v $PWD/Llama-2-7b-hf:/Llama-2-7b-hf \ - -v $PWD/engines:/engines \ + -v your_path_to/tensorrtllm_backend:/tensorrtllm_backend \ + -v your_path_to/Llama-2-7b-hf:/Llama-2-7b-hf \ + -v your_path_to/engines:/engines \ nvcr.io/nvidia/tritonserver:23.11-trtllm-python-py3 # Install Sentencepiece @@ -141,7 +141,6 @@ To run our Llama2-7B model, you will need to: ```bash mkdir -p /opt/tritonserver/model_repository cp -r /tensorrtllm_backend/all_models/inflight_batcher_llm/* /opt/tritonserver/model_repository/. - rm -r /opt/tritonserver/model_repository/tensorrt_llm_bls ``` 2. Modify config.pbtxt for the preprocessing, postprocessing and processing steps. @@ -195,7 +194,7 @@ docker run --rm -it --net host --shm-size=2g \ -v /path/to/tensorrtllm_backend:/tensorrtllm_backend \ -v /path/to/Llama2/repo:/Llama-2-7b-hf \ -v /path/to/engines:/engines \ - nvcr.io/nvidia/tritonserver:23.10-py3-sdk + nvcr.io/nvidia/tritonserver:23.11-py3-sdk # Install extra dependencies for the script pip3 install transformers sentencepiece python3 /tensorrtllm_backend/inflight_batcher_llm/client/inflight_batcher_llm_client.py --request-output-len 200 --tokenizer_type llama --tokenizer_dir /Llama-2-7b-hf