From 2e76c97470bebb9c95b330b7465c6e87b98cdc4a Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 17 Dec 2024 09:37:47 +0100 Subject: [PATCH] test --- examples/cuda_tgi_llama.yaml | 2 +- optimum_benchmark/backends/py_txi/backend.py | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/cuda_tgi_llama.yaml b/examples/cuda_tgi_llama.yaml index 16d2f5f0..a32060b1 100644 --- a/examples/cuda_tgi_llama.yaml +++ b/examples/cuda_tgi_llama.yaml @@ -16,7 +16,7 @@ backend: device: cuda device_ids: 0 cuda_graphs: 0 # remove for better perf but bigger memory footprint - no_weights: false + no_weights: true model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 scenario: diff --git a/optimum_benchmark/backends/py_txi/backend.py b/optimum_benchmark/backends/py_txi/backend.py index 014af25f..55aecab9 100644 --- a/optimum_benchmark/backends/py_txi/backend.py +++ b/optimum_benchmark/backends/py_txi/backend.py @@ -59,7 +59,10 @@ def create_no_weights_model(self) -> None: with fast_weights_init(): # unlike Transformers, TXI won't accept any missing tensors so we need to materialize the model self.pretrained_model = self.automodel_loader.from_pretrained( - model_path, **self.config.model_kwargs, device_map="auto", _fast_init=False + model_path, + _fast_init=False, + device_map="auto", + **self.config.model_kwargs, ) save_model(model=self.pretrained_model, filename=model_path / "model.safetensors", metadata={"format": "pt"}) @@ -72,12 +75,8 @@ def create_no_weights_model(self) -> None: self.generation_config.save_pretrained(save_directory=model_path) def load_model_with_no_weights(self) -> None: - original_volumes, self.config.volumes = ( - self.config.volumes, - {self.tmpdir.name: {"bind": "/data", "mode": "rw"}}, - ) + self.config.volumes = {self.tmpdir.name: {"bind": "/data", "mode": "rw"}} self.load_model_from_pretrained() - self.config.volumes = original_volumes def load_model_from_pretrained(self) -> None: if self.config.task in TEXT_GENERATION_TASKS: