test

huggingface · Dec 17, 2024 · 2e76c97 · 2e76c97
1 parent 9d5cf6c
commit 2e76c97
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 7 deletions.
diff --git a/examples/cuda_tgi_llama.yaml b/examples/cuda_tgi_llama.yaml
@@ -16,7 +16,7 @@ backend:
   device: cuda
   device_ids: 0
   cuda_graphs: 0 # remove for better perf but bigger memory footprint
-  no_weights: false
+  no_weights: true
   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 
 scenario:

diff --git a/optimum_benchmark/backends/py_txi/backend.py b/optimum_benchmark/backends/py_txi/backend.py
@@ -59,7 +59,10 @@ def create_no_weights_model(self) -> None:
         with fast_weights_init():
             # unlike Transformers, TXI won't accept any missing tensors so we need to materialize the model
             self.pretrained_model = self.automodel_loader.from_pretrained(
-                model_path, **self.config.model_kwargs, device_map="auto", _fast_init=False
+                model_path,
+                _fast_init=False,
+                device_map="auto",
+                **self.config.model_kwargs,
             )
 
         save_model(model=self.pretrained_model, filename=model_path / "model.safetensors", metadata={"format": "pt"})
@@ -72,12 +75,8 @@ def create_no_weights_model(self) -> None:
             self.generation_config.save_pretrained(save_directory=model_path)
 
     def load_model_with_no_weights(self) -> None:
-        original_volumes, self.config.volumes = (
-            self.config.volumes,
-            {self.tmpdir.name: {"bind": "/data", "mode": "rw"}},
-        )
+        self.config.volumes = {self.tmpdir.name: {"bind": "/data", "mode": "rw"}}
         self.load_model_from_pretrained()
-        self.config.volumes = original_volumes
 
     def load_model_from_pretrained(self) -> None:
         if self.config.task in TEXT_GENERATION_TASKS: