Skip to content

Commit

Permalink
fix llama3 oom
Browse files Browse the repository at this point in the history
Signed-off-by: Kaihui-intel <[email protected]>
  • Loading branch information
Kaihui-intel committed Sep 29, 2024
1 parent dea8512 commit 44c312d
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions neural_compressor/transformers/quantization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,11 @@ def convert_to_quantized_model(model, config, device="cpu"):
if orig_dtype != torch.float32:
q_model.to(dtype=orig_dtype)

if config.use_layer_wise and not (q_model.device == device or q_model.device.type == device):
logger.warning(
"Do not convert device to avoid out of memory. Recommend using saved quantized model to inference.")
return q_model

return q_model.to(device)


Expand Down

0 comments on commit 44c312d

Please sign in to comment.