From 2e76c97470bebb9c95b330b7465c6e87b98cdc4a Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Tue, 17 Dec 2024 09:37:47 +0100
Subject: [PATCH] test

---
 examples/cuda_tgi_llama.yaml                 |  2 +-
 optimum_benchmark/backends/py_txi/backend.py | 11 +++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/examples/cuda_tgi_llama.yaml b/examples/cuda_tgi_llama.yaml
index 16d2f5f0..a32060b1 100644
--- a/examples/cuda_tgi_llama.yaml
+++ b/examples/cuda_tgi_llama.yaml
@@ -16,7 +16,7 @@ backend:
   device: cuda
   device_ids: 0
   cuda_graphs: 0 # remove for better perf but bigger memory footprint
-  no_weights: false
+  no_weights: true
   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 
 scenario:
diff --git a/optimum_benchmark/backends/py_txi/backend.py b/optimum_benchmark/backends/py_txi/backend.py
index 014af25f..55aecab9 100644
--- a/optimum_benchmark/backends/py_txi/backend.py
+++ b/optimum_benchmark/backends/py_txi/backend.py
@@ -59,7 +59,10 @@ def create_no_weights_model(self) -> None:
         with fast_weights_init():
             # unlike Transformers, TXI won't accept any missing tensors so we need to materialize the model
             self.pretrained_model = self.automodel_loader.from_pretrained(
-                model_path, **self.config.model_kwargs, device_map="auto", _fast_init=False
+                model_path,
+                _fast_init=False,
+                device_map="auto",
+                **self.config.model_kwargs,
             )
 
         save_model(model=self.pretrained_model, filename=model_path / "model.safetensors", metadata={"format": "pt"})
@@ -72,12 +75,8 @@ def create_no_weights_model(self) -> None:
             self.generation_config.save_pretrained(save_directory=model_path)
 
     def load_model_with_no_weights(self) -> None:
-        original_volumes, self.config.volumes = (
-            self.config.volumes,
-            {self.tmpdir.name: {"bind": "/data", "mode": "rw"}},
-        )
+        self.config.volumes = {self.tmpdir.name: {"bind": "/data", "mode": "rw"}}
         self.load_model_from_pretrained()
-        self.config.volumes = original_volumes
 
     def load_model_from_pretrained(self) -> None:
         if self.config.task in TEXT_GENERATION_TASKS: