From 9dfd3fda52925d9a654e0a5c87c442a7d9ccbe3e Mon Sep 17 00:00:00 2001 From: Katherine Yang <80359429+jbkyang-nvi@users.noreply.github.com> Date: Wed, 8 Nov 2023 11:15:47 -0800 Subject: [PATCH] Update Popular_Models_Guide/Llama2/trtllm_guide.md Co-authored-by: Hyunjae Woo <107147848+nv-hwoo@users.noreply.github.com> --- Popular_Models_Guide/Llama2/trtllm_guide.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Popular_Models_Guide/Llama2/trtllm_guide.md b/Popular_Models_Guide/Llama2/trtllm_guide.md index 6b4ef7b2..6f4eb7e3 100644 --- a/Popular_Models_Guide/Llama2/trtllm_guide.md +++ b/Popular_Models_Guide/Llama2/trtllm_guide.md @@ -136,7 +136,12 @@ You can test the results of the run with: ```bash # Using the SDK container as an example -docker run --rm -it --net host --shm-size=2g --ulimit memlock=-1 --ulimit stack=67108864 --gpus all -v /path/to/tensorrtllm_backend:/tensorrtllm_backend -v /path/to/Llama2/repo:/Llama-2-7b-hf -v /path/to/engines:/engines nvcr.io/nvidia/tritonserver:23.10-py3-sdk +docker run --rm -it --net host --shm-size=2g \ + --ulimit memlock=-1 --ulimit stack=67108864 --gpus all \ + -v /path/to/tensorrtllm_backend:/tensorrtllm_backend \ + -v /path/to/Llama2/repo:/Llama-2-7b-hf \ + -v /path/to/engines:/engines \ + nvcr.io/nvidia/tritonserver:23.10-py3-sdk # install extra dependencies for the script pip3 install transformers sentencepiece python3 /tensorrtllm_backend/inflight_batcher_llm/client/inflight_batcher_llm_client.py --request-output-len 200 --tokenizer_type llama --tokenizer_dir /Llama-2-7b-hf