Skip to content

Commit

Permalink
disable no weights on tgi cuda for now
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Dec 16, 2024
1 parent 3b0138b commit a8c4159
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 16 deletions.
3 changes: 1 addition & 2 deletions examples/cuda_tgi_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ backend:
device: cuda
device_ids: 0
cuda_graphs: 0 # remove for better perf but bigger memory footprint
no_weights: true
no_weights: false
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
image: ghcr.io/huggingface/text-generation-inference:2.4.1

scenario:
input_shapes:
Expand Down
11 changes: 6 additions & 5 deletions optimum_benchmark/backends/py_txi/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,12 @@ def download_pretrained_model(self) -> None:

def create_no_weights_model(self) -> None:
self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights_model")
filename = os.path.join(self.no_weights_model, "model.safetensors")
os.makedirs(self.no_weights_model, exist_ok=True)

self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
self.pretrained_processor.save_pretrained(save_directory=self.no_weights_model)
if self.config.task in TEXT_GENERATION_TASKS:
self.generation_config.eos_token_id = None
self.generation_config.pad_token_id = None
self.generation_config.save_pretrained(save_directory=self.no_weights_model)

filename = os.path.join(self.no_weights_model, "model.safetensors")
save_model(model=torch.nn.Linear(1, 1), filename=filename, metadata={"format": "pt"})
with fast_weights_init():
# unlike Transformers, TXI won't accept any missing tensors so we need to materialize the model
Expand All @@ -66,6 +62,11 @@ def create_no_weights_model(self) -> None:
del self.pretrained_model
torch.cuda.empty_cache()

if self.config.task in TEXT_GENERATION_TASKS:
self.generation_config.eos_token_id = None
self.generation_config.pad_token_id = None
self.generation_config.save_pretrained(save_directory=self.no_weights_model)

def load_model_with_no_weights(self) -> None:
self.config.volumes = {self.no_weights_model: {"bind": "/no_weights_model/", "mode": "rw"}}
original_model, self.config.model = self.config.model, "/no_weights_model/"
Expand Down
4 changes: 0 additions & 4 deletions tests/configs/cpu_inference_py_txi_gpt2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,3 @@ defaults:
- override backend: py-txi

name: cpu_inference_py_txi_gpt2

backend:
cuda_graphs: 0
image: ghcr.io/huggingface/text-generation-inference:2.4.1
5 changes: 0 additions & 5 deletions tests/configs/cuda_inference_py_txi_gpt2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,8 @@ defaults:
- _base_ # inherits from base config
- _cuda_ # inherits from cuda config
- _inference_ # inherits from inference config
- _no_weights_ # inherits from no weights config
- _gpt2_ # inherits from gpt2 config
- _self_ # hydra 1.1 compatibility
- override backend: py-txi

name: cuda_inference_py_txi_gpt2

backend:
cuda_graphs: 0
image: ghcr.io/huggingface/text-generation-inference:2.4.1

0 comments on commit a8c4159

Please sign in to comment.