addressed comments and local paths

triton-inference-server · Dec 14, 2023 · 76371ef · 76371ef
1 parent f0cbc79
commit 76371ef
Showing 1 changed file with 4 additions and 5 deletions.
diff --git a/Popular_Models_Guide/Llama2/trtllm_guide.md b/Popular_Models_Guide/Llama2/trtllm_guide.md
@@ -62,9 +62,9 @@ git submodule update --init --recursive
 ```bash
 docker run --rm -it --net host --shm-size=2g \
     --ulimit memlock=-1 --ulimit stack=67108864 --gpus all \
-    -v $PWD/tensorrtllm_backend:/tensorrtllm_backend \
-    -v $PWD/Llama-2-7b-hf:/Llama-2-7b-hf \
-    -v $PWD/engines:/engines \
+    -v your_path_to/tensorrtllm_backend:/tensorrtllm_backend \
+    -v your_path_to/Llama-2-7b-hf:/Llama-2-7b-hf \
+    -v your_path_to/engines:/engines \
     nvcr.io/nvidia/tritonserver:23.11-trtllm-python-py3
 
 # Install Sentencepiece
@@ -141,7 +141,6 @@ To run our Llama2-7B model, you will need to:
     ```bash
     mkdir -p /opt/tritonserver/model_repository
     cp -r /tensorrtllm_backend/all_models/inflight_batcher_llm/* /opt/tritonserver/model_repository/.
-    rm -r /opt/tritonserver/model_repository/tensorrt_llm_bls
     ```
 
 2. Modify config.pbtxt for the preprocessing, postprocessing and processing steps.
@@ -195,7 +194,7 @@ docker run --rm -it --net host --shm-size=2g \
     -v /path/to/tensorrtllm_backend:/tensorrtllm_backend \
     -v /path/to/Llama2/repo:/Llama-2-7b-hf \
     -v /path/to/engines:/engines \
-    nvcr.io/nvidia/tritonserver:23.10-py3-sdk
+    nvcr.io/nvidia/tritonserver:23.11-py3-sdk
 # Install extra dependencies for the script
 pip3 install transformers sentencepiece
 python3 /tensorrtllm_backend/inflight_batcher_llm/client/inflight_batcher_llm_client.py --request-output-len 200 --tokenizer_type llama --tokenizer_dir /Llama-2-7b-hf