From b9e1c30d0e314b563c1b7ed59c520ebf743dec9f Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Mon, 4 Mar 2024 08:55:30 +0530 Subject: [PATCH] [Docs] more elaborate example for peft `torch.compile` (#7161) more elaborate example for peft torch.compile --- docs/source/en/tutorials/using_peft_for_inference.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/source/en/tutorials/using_peft_for_inference.md b/docs/source/en/tutorials/using_peft_for_inference.md index fb209a7a8f1f..22708b8750cf 100644 --- a/docs/source/en/tutorials/using_peft_for_inference.md +++ b/docs/source/en/tutorials/using_peft_for_inference.md @@ -169,7 +169,7 @@ list_adapters_component_wise If you want to compile your model with `torch.compile` make sure to first fuse the LoRA weights into the base model and unload them. -```py +```diff pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel") pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy") @@ -178,12 +178,16 @@ pipe.set_adapters(["pixel", "toy"], adapter_weights=[0.5, 1.0]) pipe.fuse_lora() pipe.unload_lora_weights() -pipe = torch.compile(pipe) ++ pipe.unet.to(memory_format=torch.channels_last) ++ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) prompt = "toy_face of a hacker with a hoodie, pixel art" image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0] ``` +> [!TIP] +> You can refer to the `torch.compile()` section [here](https://huggingface.co/docs/diffusers/main/en/optimization/torch2.0#torchcompile) and [here](https://huggingface.co/docs/diffusers/main/en/tutorials/fast_diffusion#torchcompile) for more elaborate examples. + ## Fusing adapters into the model You can use PEFT to easily fuse/unfuse multiple adapters directly into the model weights (both UNet and text encoder) using the [`~diffusers.loaders.LoraLoaderMixin.fuse_lora`] method, which can lead to a speed-up in inference and lower VRAM usage.