diff --git a/docs/assets/guides/models/03-sd-lora.png b/docs/assets/guides/models/03-sd-lora.png new file mode 100644 index 000000000..72178dce6 Binary files /dev/null and b/docs/assets/guides/models/03-sd-lora.png differ diff --git a/docs/source/tutorials/stable_diffusion.mdx b/docs/source/tutorials/stable_diffusion.mdx index 52ff5b695..b67841e29 100644 --- a/docs/source/tutorials/stable_diffusion.mdx +++ b/docs/source/tutorials/stable_diffusion.mdx @@ -469,4 +469,51 @@ Inf2 instances contain one or more Neuron devices, and each Neuron device includ + +## Load adapters + +### LoRA + +Low-Rank Adaptation is fast way to Stable Diffusion to adapt styles of generated images. In Optimum Neuron, we support using one or multiple LoRA adapters by fusing their parameters into the original parameters of the text encoder(s) and the unet during the compilation. Here below is an example of compiling stable diffusion models with LoRA adapters of your choice and using the compiled artifacts to generate styled images: + +```python + +from diffusers import LCMScheduler +from optimum.neuron import NeuronStableDiffusionPipeline + + +model_id = "Lykon/dreamshaper-7" +adapter_id = "latent-consistency/lcm-lora-sdv1-5" +input_shapes = {"batch_size": 1, "height": 512, "width": 512, "num_images_per_prompt": 1} +compiler_args = {"auto_cast": "matmul", "auto_cast_type": "bf16"} + +# Compile +pipe = NeuronStableDiffusionPipeline.from_pretrained( + model_id, + export=True, + inline_weights_to_neff=True, # caveat: performance drop if neff/weights separated, will be improved by a future Neuron sdk release. + lora_model_ids=adapter_id, + lora_weight_names="pytorch_lora_weights.safetensors", + lora_adapter_names="lcm", + **input_shapes, + **compiler_args, +) +pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) + +# Save locally or upload to the HuggingFace Hub +pipe.save_pretrained("dreamshaper_7_lcm_lora_neuron/") + + +# Inference +prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k" +image = pipe(prompt, num_inference_steps=4, guidance_scale=0).images[0] +``` + +stable diffusion generated image with LoRA adapter. + Are there any other stable diffusion features that you want us to support in 🤗`Optimum-neuron`? Please file an issue to [`Optimum-neuron` Github repo](https://github.com/huggingface/optimum-neuron) or discuss with us on [HuggingFace’s community forum](https://discuss.huggingface.co/c/optimum/), cheers 🤗 ! diff --git a/optimum/exporters/neuron/utils.py b/optimum/exporters/neuron/utils.py index 47574e5af..f0718d18c 100644 --- a/optimum/exporters/neuron/utils.py +++ b/optimum/exporters/neuron/utils.py @@ -287,7 +287,7 @@ def _load_lora_weights_to_pipeline( if len(lora_model_ids) == 1: pipeline.load_lora_weights(lora_model_ids[0], weight_name=weight_names[0]) # For tracing the lora weights, we need to use PEFT to fuse adapters directly into the model weights. It won't work by passing the lora scale to the Neuron pipeline during the inference. - pipeline.fuse_lora(lora_scale=lora_scales[0]) + pipeline.fuse_lora(lora_scale=lora_scales[0] if lora_scales else 1.0) elif len(lora_model_ids) > 1: if not len(lora_model_ids) == len(weight_names) == len(adapter_names): raise ValueError( @@ -300,6 +300,8 @@ def _load_lora_weights_to_pipeline( pipeline.set_adapters(adapter_names, adapter_weights=lora_scales) pipeline.fuse_lora() + return pipeline + def get_submodels_for_export_stable_diffusion( pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"], @@ -314,7 +316,7 @@ def get_submodels_for_export_stable_diffusion( """ is_sdxl = "xl" in task - _load_lora_weights_to_pipeline( + pipeline = _load_lora_weights_to_pipeline( pipeline=pipeline, lora_model_ids=lora_model_ids, weight_names=lora_weight_names, diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py index 0e49fc810..8681afd6c 100644 --- a/optimum/neuron/modeling_diffusion.py +++ b/optimum/neuron/modeling_diffusion.py @@ -653,7 +653,7 @@ def _export( Lora model local paths or repo ids (eg. `ostris/super-cereal-sdxl-lora`) on the Hugginface Hub. lora_weight_names (`Optional[Union[str, List[str]]]`, defaults to `None`): Lora weights file names. - lora_adapter_names (`Optional[List[str]]`, defaults to `None`): + lora_adapter_names (`Optional[Union[str, List[str]]]`, defaults to `None`): Adapter names to be used for referencing the loaded adapter models. lora_scales (`Optional[List[float]]`, defaults to `None`): Lora adapters scaling factors.