Skip to content

Commit

Permalink
Consolidate CPU & XPU IPEX optimisations, & implement LLM optimisations
Browse files Browse the repository at this point in the history
  • Loading branch information
Steve-Tech committed Sep 19, 2024
1 parent a04072e commit a62344a
Showing 1 changed file with 9 additions and 9 deletions.
18 changes: 9 additions & 9 deletions fastchat/model/model_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,22 +366,22 @@ def load_model(
# Load model
model, tokenizer = adapter.load_model(model_path, kwargs)

if (
device == "cpu"
and kwargs["torch_dtype"] is torch.bfloat16
and CPU_ISA is not None
):
model = ipex.optimize(model, dtype=kwargs["torch_dtype"])

if (device == "cuda" and num_gpus == 1 and not cpu_offloading) or device in (
"mps",
"xpu",
"npu",
):
model.to(device)

if device == "xpu":
model = torch.xpu.optimize(model, dtype=kwargs["torch_dtype"], inplace=True)
if (
device == "cpu"
and kwargs["torch_dtype"] is torch.bfloat16
and CPU_ISA is not None
) or device == "xpu":
if "llm" in dir(ipex):
model = ipex.llm.optimize(model, dtype=kwargs["torch_dtype"], device=device)
else:
model = ipex.optimize(model, dtype=kwargs["torch_dtype"])

if debug:
print(model)
Expand Down

0 comments on commit a62344a

Please sign in to comment.