From 91e95cd0ebc011d4c46e18941c4c5fa578997f84 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 10 Dec 2024 16:54:41 +0100 Subject: [PATCH] delete cache --- .github/workflows/test_cli_cuda_tensorrt_llm.yaml | 4 +++- examples/cuda_trt_llama.yaml | 7 +++---- optimum_benchmark/backends/tensorrt_llm/backend.py | 1 - optimum_benchmark/backends/tensorrt_llm/config.py | 1 - 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml index 60a616d6..c75aac92 100644 --- a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml +++ b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml @@ -56,7 +56,9 @@ jobs: contains( github.event.pull_request.labels.*.name, 'examples') }} name: Run examples - run: pytest tests/test_examples.py -x -s -k "cli and cuda and trt" + run: | + huggingface-cli delete-cache + pytest tests/test_examples.py -x -s -k "cli and cuda and trt" cli_cuda_tensorrt_llm_multi_gpu_tests: if: ${{ diff --git a/examples/cuda_trt_llama.yaml b/examples/cuda_trt_llama.yaml index 26f35b2c..c483fc2f 100644 --- a/examples/cuda_trt_llama.yaml +++ b/examples/cuda_trt_llama.yaml @@ -15,11 +15,10 @@ launcher: backend: device: cuda device_ids: 0 - force_export: true - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 - max_prompt_length: 64 - max_new_tokens: 32 max_batch_size: 4 + max_new_tokens: 32 + max_prompt_length: 64 + model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 scenario: input_shapes: diff --git a/optimum_benchmark/backends/tensorrt_llm/backend.py b/optimum_benchmark/backends/tensorrt_llm/backend.py index f46ce6c8..a05187c3 100644 --- a/optimum_benchmark/backends/tensorrt_llm/backend.py +++ b/optimum_benchmark/backends/tensorrt_llm/backend.py @@ -46,7 +46,6 @@ def load_trtmodel_from_pretrained(self) -> None: max_batch_size=self.config.max_batch_size, max_new_tokens=self.config.max_new_tokens, max_beam_width=self.config.max_beam_width, - force_export=self.config.force_export, **self.config.model_kwargs, ) diff --git a/optimum_benchmark/backends/tensorrt_llm/config.py b/optimum_benchmark/backends/tensorrt_llm/config.py index 4fc83f11..d7f4b1cb 100644 --- a/optimum_benchmark/backends/tensorrt_llm/config.py +++ b/optimum_benchmark/backends/tensorrt_llm/config.py @@ -18,7 +18,6 @@ class TRTLLMConfig(BackendConfig): pp: int = 1 use_fp8: bool = False dtype: str = "float16" - force_export: bool = False optimization_level: int = 2 use_cuda_graph: bool = False