diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 9f4776db4315..340d8a19e17a 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -34,7 +34,7 @@ jobs: id: file_changes uses: jitterbit/get-changed-files@v1 with: - format: 'space-delimited' + format: "space-delimited" token: ${{ secrets.GITHUB_TOKEN }} - name: Build Changed Docker Images @@ -67,6 +67,7 @@ jobs: - diffusers-pytorch-cuda - diffusers-pytorch-compile-cuda - diffusers-pytorch-xformers-cuda + - diffusers-pytorch-minimum-cuda - diffusers-flax-cpu - diffusers-flax-tpu - diffusers-onnxruntime-cpu diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml index 9375f760a151..fb5288c1145f 100644 --- a/.github/workflows/nightly_tests.yml +++ b/.github/workflows/nightly_tests.yml @@ -235,7 +235,64 @@ jobs: run: | pip install slack_sdk tabulate python utils/log_reports.py >> $GITHUB_STEP_SUMMARY + + torch_minimum_version_cuda_tests: + name: Torch Minimum Version CUDA Tests + runs-on: + group: aws-g4dn-2xlarge + container: + image: diffusers/diffusers-pytorch-minimum-cuda + options: --shm-size "16gb" --ipc host --gpus 0 + defaults: + run: + shell: bash + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Install dependencies + run: | + python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" + python -m uv pip install -e [quality,test] + python -m uv pip install peft@git+https://github.com/huggingface/peft.git + pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git + - name: Environment + run: | + python utils/print_env.py + + - name: Run PyTorch CUDA tests + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms + CUBLAS_WORKSPACE_CONFIG: :16:8 + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "not Flax and not Onnx" \ + --make-reports=tests_torch_minimum_version_cuda \ + tests/models/test_modelling_common.py \ + tests/pipelines/test_pipelines_common.py \ + tests/pipelines/test_pipeline_utils.py \ + tests/pipelines/test_pipelines.py \ + tests/pipelines/test_pipelines_auto.py \ + tests/schedulers/test_schedulers.py \ + tests/others + + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_torch_minimum_version_cuda_stats.txt + cat reports/tests_torch_minimum_version_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: torch_minimum_version_cuda_test_reports + path: reports + run_flax_tpu_tests: name: Nightly Flax TPU Tests runs-on: diff --git a/.github/workflows/release_tests_fast.yml b/.github/workflows/release_tests_fast.yml index a8a6f2699dca..bd0b58256d65 100644 --- a/.github/workflows/release_tests_fast.yml +++ b/.github/workflows/release_tests_fast.yml @@ -157,6 +157,63 @@ jobs: name: torch_cuda_${{ matrix.module }}_test_reports path: reports + torch_minimum_version_cuda_tests: + name: Torch Minimum Version CUDA Tests + runs-on: + group: aws-g4dn-2xlarge + container: + image: diffusers/diffusers-pytorch-minimum-cuda + options: --shm-size "16gb" --ipc host --gpus 0 + defaults: + run: + shell: bash + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: Install dependencies + run: | + python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" + python -m uv pip install -e [quality,test] + python -m uv pip install peft@git+https://github.com/huggingface/peft.git + pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git + + - name: Environment + run: | + python utils/print_env.py + + - name: Run PyTorch CUDA tests + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms + CUBLAS_WORKSPACE_CONFIG: :16:8 + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "not Flax and not Onnx" \ + --make-reports=tests_torch_minimum_cuda \ + tests/models/test_modelling_common.py \ + tests/pipelines/test_pipelines_common.py \ + tests/pipelines/test_pipeline_utils.py \ + tests/pipelines/test_pipelines.py \ + tests/pipelines/test_pipelines_auto.py \ + tests/schedulers/test_schedulers.py \ + tests/others + + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_torch_minimum_version_cuda_stats.txt + cat reports/tests_torch_minimum_version_cuda_failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: torch_minimum_version_cuda_test_reports + path: reports + flax_tpu_tests: name: Flax TPU Tests runs-on: docker-tpu diff --git a/docker/diffusers-pytorch-minimum-cuda/Dockerfile b/docker/diffusers-pytorch-minimum-cuda/Dockerfile new file mode 100644 index 000000000000..57ca7657acf1 --- /dev/null +++ b/docker/diffusers-pytorch-minimum-cuda/Dockerfile @@ -0,0 +1,53 @@ +FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04 +LABEL maintainer="Hugging Face" +LABEL repository="diffusers" + +ENV DEBIAN_FRONTEND=noninteractive +ENV MINIMUM_SUPPORTED_TORCH_VERSION="2.1.0" +ENV MINIMUM_SUPPORTED_TORCHVISION_VERSION="0.16.0" +ENV MINIMUM_SUPPORTED_TORCHAUDIO_VERSION="2.1.0" + +RUN apt-get -y update \ + && apt-get install -y software-properties-common \ + && add-apt-repository ppa:deadsnakes/ppa + +RUN apt install -y bash \ + build-essential \ + git \ + git-lfs \ + curl \ + ca-certificates \ + libsndfile1-dev \ + libgl1 \ + python3.10 \ + python3.10-dev \ + python3-pip \ + python3.10-venv && \ + rm -rf /var/lib/apt/lists + +# make sure to use venv +RUN python3.10 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) +RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \ + python3.10 -m uv pip install --no-cache-dir \ + torch==$MINIMUM_SUPPORTED_TORCH_VERSION \ + torchvision==$MINIMUM_SUPPORTED_TORCHVISION_VERSION \ + torchaudio==$MINIMUM_SUPPORTED_TORCHAUDIO_VERSION \ + invisible_watermark && \ + python3.10 -m pip install --no-cache-dir \ + accelerate \ + datasets \ + hf-doc-builder \ + huggingface-hub \ + hf_transfer \ + Jinja2 \ + librosa \ + numpy==1.26.4 \ + scipy \ + tensorboard \ + transformers \ + hf_transfer + +CMD ["/bin/bash"] diff --git a/docs/source/en/api/pipelines/allegro.md b/docs/source/en/api/pipelines/allegro.md index dc9b368c9465..690f8096a0e4 100644 --- a/docs/source/en/api/pipelines/allegro.md +++ b/docs/source/en/api/pipelines/allegro.md @@ -19,7 +19,7 @@ The abstract from the paper is: -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/animatediff.md b/docs/source/en/api/pipelines/animatediff.md index 735901280362..fca72e953625 100644 --- a/docs/source/en/api/pipelines/animatediff.md +++ b/docs/source/en/api/pipelines/animatediff.md @@ -803,7 +803,7 @@ FreeInit is not really free - the improved quality comes at the cost of extra co -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/attend_and_excite.md b/docs/source/en/api/pipelines/attend_and_excite.md index fd8dd95fa1c3..953ab1bb7288 100644 --- a/docs/source/en/api/pipelines/attend_and_excite.md +++ b/docs/source/en/api/pipelines/attend_and_excite.md @@ -22,7 +22,7 @@ You can find additional information about Attend-and-Excite on the [project page -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/audioldm.md b/docs/source/en/api/pipelines/audioldm.md index 95d41b9569f5..02fe2c779eee 100644 --- a/docs/source/en/api/pipelines/audioldm.md +++ b/docs/source/en/api/pipelines/audioldm.md @@ -37,7 +37,7 @@ During inference: -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/audioldm2.md b/docs/source/en/api/pipelines/audioldm2.md index 9f2b7529d4bc..debd2c3433e4 100644 --- a/docs/source/en/api/pipelines/audioldm2.md +++ b/docs/source/en/api/pipelines/audioldm2.md @@ -60,7 +60,7 @@ The following example demonstrates how to construct good music and speech genera -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/blip_diffusion.md b/docs/source/en/api/pipelines/blip_diffusion.md index b4504f6d6b19..15d17da8f07c 100644 --- a/docs/source/en/api/pipelines/blip_diffusion.md +++ b/docs/source/en/api/pipelines/blip_diffusion.md @@ -25,7 +25,7 @@ The original codebase can be found at [salesforce/LAVIS](https://github.com/sale -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/cogvideox.md b/docs/source/en/api/pipelines/cogvideox.md index eaae8ab795ce..dec48d8b3593 100644 --- a/docs/source/en/api/pipelines/cogvideox.md +++ b/docs/source/en/api/pipelines/cogvideox.md @@ -23,7 +23,7 @@ The abstract from the paper is: -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/cogview3.md b/docs/source/en/api/pipelines/cogview3.md index 025da9cba9aa..277edca4cf33 100644 --- a/docs/source/en/api/pipelines/cogview3.md +++ b/docs/source/en/api/pipelines/cogview3.md @@ -23,7 +23,7 @@ The abstract from the paper is: -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/controlnet.md b/docs/source/en/api/pipelines/controlnet.md index 6b00902cf296..e9bbb32cedb4 100644 --- a/docs/source/en/api/pipelines/controlnet.md +++ b/docs/source/en/api/pipelines/controlnet.md @@ -26,7 +26,7 @@ The original codebase can be found at [lllyasviel/ControlNet](https://github.com -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/controlnet_flux.md b/docs/source/en/api/pipelines/controlnet_flux.md index 82454ae5e930..c4dc0b9ff3c3 100644 --- a/docs/source/en/api/pipelines/controlnet_flux.md +++ b/docs/source/en/api/pipelines/controlnet_flux.md @@ -42,7 +42,7 @@ XLabs ControlNets are also supported, which was contributed by the [XLabs team]( -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/controlnet_hunyuandit.md b/docs/source/en/api/pipelines/controlnet_hunyuandit.md index e702eb30b8b0..6776b88ab35f 100644 --- a/docs/source/en/api/pipelines/controlnet_hunyuandit.md +++ b/docs/source/en/api/pipelines/controlnet_hunyuandit.md @@ -26,7 +26,7 @@ This code is implemented by Tencent Hunyuan Team. You can find pre-trained check -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/controlnet_sd3.md b/docs/source/en/api/pipelines/controlnet_sd3.md index 20bc6cc9abfc..aa28cfe345c8 100644 --- a/docs/source/en/api/pipelines/controlnet_sd3.md +++ b/docs/source/en/api/pipelines/controlnet_sd3.md @@ -36,7 +36,7 @@ This controlnet code is mainly implemented by [The InstantX Team](https://huggin -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/controlnet_sdxl.md b/docs/source/en/api/pipelines/controlnet_sdxl.md index 2de7cbff6ebc..4fb32118abf8 100644 --- a/docs/source/en/api/pipelines/controlnet_sdxl.md +++ b/docs/source/en/api/pipelines/controlnet_sdxl.md @@ -32,7 +32,7 @@ If you don't see a checkpoint you're interested in, you can train your own SDXL -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/controlnetxs.md b/docs/source/en/api/pipelines/controlnetxs.md index 2d4ae7b8ce46..4da517f41b75 100644 --- a/docs/source/en/api/pipelines/controlnetxs.md +++ b/docs/source/en/api/pipelines/controlnetxs.md @@ -26,7 +26,7 @@ This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️ -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/controlnetxs_sdxl.md b/docs/source/en/api/pipelines/controlnetxs_sdxl.md index 31075c0ef96a..0862a5d79878 100644 --- a/docs/source/en/api/pipelines/controlnetxs_sdxl.md +++ b/docs/source/en/api/pipelines/controlnetxs_sdxl.md @@ -32,7 +32,7 @@ This model was contributed by [UmerHA](https://twitter.com/UmerHAdil). ❤️ -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/dance_diffusion.md b/docs/source/en/api/pipelines/dance_diffusion.md index efba3c3763a4..9b6e7b66e198 100644 --- a/docs/source/en/api/pipelines/dance_diffusion.md +++ b/docs/source/en/api/pipelines/dance_diffusion.md @@ -19,7 +19,7 @@ Dance Diffusion is the first in a suite of generative audio tools for producers -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/ddpm.md b/docs/source/en/api/pipelines/ddpm.md index 81ddb5e0c051..0935f0bec79c 100644 --- a/docs/source/en/api/pipelines/ddpm.md +++ b/docs/source/en/api/pipelines/ddpm.md @@ -22,7 +22,7 @@ The original codebase can be found at [hohonathanho/diffusion](https://github.co -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/dit.md b/docs/source/en/api/pipelines/dit.md index 1d04458d9cb9..2ee45b631c77 100644 --- a/docs/source/en/api/pipelines/dit.md +++ b/docs/source/en/api/pipelines/dit.md @@ -22,7 +22,7 @@ The original codebase can be found at [facebookresearch/dit](https://github.com/ -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/flux.md b/docs/source/en/api/pipelines/flux.md index 1c6989a5e659..fd2c07e59f3f 100644 --- a/docs/source/en/api/pipelines/flux.md +++ b/docs/source/en/api/pipelines/flux.md @@ -305,6 +305,10 @@ image = control_pipe( image.save("output.png") ``` +## Note about `unload_lora_weights()` when using Flux LoRAs + +When unloading the Control LoRA weights, call `pipe.unload_lora_weights(reset_to_overwritten_params=True)` to reset the `pipe.transformer` completely back to its original form. The resultant pipeline can then be used with methods like [`DiffusionPipeline.from_pipe`]. More details about this argument are available in [this PR](https://github.com/huggingface/diffusers/pull/10397). + ## Running FP16 inference Flux can generate high-quality images with FP16 (i.e. to accelerate inference on Turing/Volta GPUs) but produces different outputs compared to FP32/BF16. The issue is that some activations in the text encoders have to be clipped when running in FP16, which affects the overall image. Forcing text encoders to run with FP32 inference thus removes this output difference. See [here](https://github.com/huggingface/diffusers/pull/9097#issuecomment-2272292516) for details. diff --git a/docs/source/en/api/pipelines/hunyuan_video.md b/docs/source/en/api/pipelines/hunyuan_video.md index 2351fcf0aa8f..df43c7f8568d 100644 --- a/docs/source/en/api/pipelines/hunyuan_video.md +++ b/docs/source/en/api/pipelines/hunyuan_video.md @@ -20,7 +20,7 @@ -Make sure to check out the Schedulers [guide](https://huggingface.co/docs/diffusers/main/en/using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/hunyuandit.md b/docs/source/en/api/pipelines/hunyuandit.md index 53053ffe3b6a..d593259a09ed 100644 --- a/docs/source/en/api/pipelines/hunyuandit.md +++ b/docs/source/en/api/pipelines/hunyuandit.md @@ -30,7 +30,7 @@ HunyuanDiT has the following components: -Make sure to check out the Schedulers [guide](https://huggingface.co/docs/diffusers/main/en/using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/i2vgenxl.md b/docs/source/en/api/pipelines/i2vgenxl.md index cbb6be1176fd..3994f91d2cd0 100644 --- a/docs/source/en/api/pipelines/i2vgenxl.md +++ b/docs/source/en/api/pipelines/i2vgenxl.md @@ -22,7 +22,7 @@ The original codebase can be found [here](https://github.com/ali-vilab/i2vgen-xl -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. Also, to know more about reducing the memory usage of this pipeline, refer to the ["Reduce memory usage"] section [here](../../using-diffusers/svd#reduce-memory-usage). +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. Also, to know more about reducing the memory usage of this pipeline, refer to the ["Reduce memory usage"] section [here](../../using-diffusers/svd#reduce-memory-usage). diff --git a/docs/source/en/api/pipelines/kandinsky.md b/docs/source/en/api/pipelines/kandinsky.md index 9ea3cd4a1718..72cbf3fb474d 100644 --- a/docs/source/en/api/pipelines/kandinsky.md +++ b/docs/source/en/api/pipelines/kandinsky.md @@ -25,7 +25,7 @@ Check out the [Kandinsky Community](https://huggingface.co/kandinsky-community) -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/kandinsky3.md b/docs/source/en/api/pipelines/kandinsky3.md index 96123846af32..a58932aa661b 100644 --- a/docs/source/en/api/pipelines/kandinsky3.md +++ b/docs/source/en/api/pipelines/kandinsky3.md @@ -32,7 +32,7 @@ Check out the [Kandinsky Community](https://huggingface.co/kandinsky-community) -Make sure to check out the schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/kandinsky_v22.md b/docs/source/en/api/pipelines/kandinsky_v22.md index 13a6ca81d4a5..f097a085ef7f 100644 --- a/docs/source/en/api/pipelines/kandinsky_v22.md +++ b/docs/source/en/api/pipelines/kandinsky_v22.md @@ -25,7 +25,7 @@ Check out the [Kandinsky Community](https://huggingface.co/kandinsky-community) -Make sure to check out the schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/latent_diffusion.md b/docs/source/en/api/pipelines/latent_diffusion.md index ab50faebbfba..e5cc7c1ab069 100644 --- a/docs/source/en/api/pipelines/latent_diffusion.md +++ b/docs/source/en/api/pipelines/latent_diffusion.md @@ -22,7 +22,7 @@ The original codebase can be found at [CompVis/latent-diffusion](https://github. -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/latte.md b/docs/source/en/api/pipelines/latte.md index d31ed0b4ed61..26e087442cdc 100644 --- a/docs/source/en/api/pipelines/latte.md +++ b/docs/source/en/api/pipelines/latte.md @@ -28,7 +28,7 @@ This pipeline was contributed by [maxin-cn](https://github.com/maxin-cn). The or -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/ltx_video.md b/docs/source/en/api/pipelines/ltx_video.md index df400d8051a6..21096df5c2ab 100644 --- a/docs/source/en/api/pipelines/ltx_video.md +++ b/docs/source/en/api/pipelines/ltx_video.md @@ -18,7 +18,7 @@ -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/lumina.md b/docs/source/en/api/pipelines/lumina.md index 2458b1f815d9..1967e85f173a 100644 --- a/docs/source/en/api/pipelines/lumina.md +++ b/docs/source/en/api/pipelines/lumina.md @@ -47,7 +47,7 @@ This pipeline was contributed by [PommesPeter](https://github.com/PommesPeter). -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/marigold.md b/docs/source/en/api/pipelines/marigold.md index 374947ce95ab..93ca39e77b9c 100644 --- a/docs/source/en/api/pipelines/marigold.md +++ b/docs/source/en/api/pipelines/marigold.md @@ -43,7 +43,7 @@ The original checkpoints can be found under the [PRS-ETH](https://huggingface.co -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. Also, to know more about reducing the memory usage of this pipeline, refer to the ["Reduce memory usage"] section [here](../../using-diffusers/svd#reduce-memory-usage). +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. Also, to know more about reducing the memory usage of this pipeline, refer to the ["Reduce memory usage"] section [here](../../using-diffusers/svd#reduce-memory-usage). diff --git a/docs/source/en/api/pipelines/musicldm.md b/docs/source/en/api/pipelines/musicldm.md index 3ffb6541405d..412e8e41c2ca 100644 --- a/docs/source/en/api/pipelines/musicldm.md +++ b/docs/source/en/api/pipelines/musicldm.md @@ -42,7 +42,7 @@ During inference: -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/paint_by_example.md b/docs/source/en/api/pipelines/paint_by_example.md index effd608873fd..75360596d676 100644 --- a/docs/source/en/api/pipelines/paint_by_example.md +++ b/docs/source/en/api/pipelines/paint_by_example.md @@ -26,7 +26,7 @@ Paint by Example is supported by the official [Fantasy-Studio/Paint-by-Example]( -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/panorama.md b/docs/source/en/api/pipelines/panorama.md index b34008ad830f..7633ed10bb95 100644 --- a/docs/source/en/api/pipelines/panorama.md +++ b/docs/source/en/api/pipelines/panorama.md @@ -37,7 +37,7 @@ But with circular padding, the right and the left parts are matching (`circular_ -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/pix2pix.md b/docs/source/en/api/pipelines/pix2pix.md index 52767a90b214..53f46d47773a 100644 --- a/docs/source/en/api/pipelines/pix2pix.md +++ b/docs/source/en/api/pipelines/pix2pix.md @@ -22,7 +22,7 @@ You can find additional information about InstructPix2Pix on the [project page]( -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/pixart.md b/docs/source/en/api/pipelines/pixart.md index 296f92ad07e9..d4e268b81d49 100644 --- a/docs/source/en/api/pipelines/pixart.md +++ b/docs/source/en/api/pipelines/pixart.md @@ -31,7 +31,7 @@ Some notes about this pipeline: -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/sana.md b/docs/source/en/api/pipelines/sana.md index dab4822cf286..50eb79088c80 100644 --- a/docs/source/en/api/pipelines/sana.md +++ b/docs/source/en/api/pipelines/sana.md @@ -22,7 +22,7 @@ The abstract from the paper is: -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading.md#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/self_attention_guidance.md b/docs/source/en/api/pipelines/self_attention_guidance.md index e56aae2a775b..d656ce93f104 100644 --- a/docs/source/en/api/pipelines/self_attention_guidance.md +++ b/docs/source/en/api/pipelines/self_attention_guidance.md @@ -22,7 +22,7 @@ You can find additional information about Self-Attention Guidance on the [projec -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/semantic_stable_diffusion.md b/docs/source/en/api/pipelines/semantic_stable_diffusion.md index 19a0a8116989..b9aacd3518d8 100644 --- a/docs/source/en/api/pipelines/semantic_stable_diffusion.md +++ b/docs/source/en/api/pipelines/semantic_stable_diffusion.md @@ -21,7 +21,7 @@ The abstract from the paper is: -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/shap_e.md b/docs/source/en/api/pipelines/shap_e.md index 9f9155c79e89..3c1f939c1fce 100644 --- a/docs/source/en/api/pipelines/shap_e.md +++ b/docs/source/en/api/pipelines/shap_e.md @@ -19,7 +19,7 @@ The original codebase can be found at [openai/shap-e](https://github.com/openai/ -See the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +See the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/stable_unclip.md b/docs/source/en/api/pipelines/stable_unclip.md index 3067ba91f752..ab0b73911920 100644 --- a/docs/source/en/api/pipelines/stable_unclip.md +++ b/docs/source/en/api/pipelines/stable_unclip.md @@ -97,7 +97,7 @@ image -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/text_to_video.md b/docs/source/en/api/pipelines/text_to_video.md index 7522264e0b58..987582ed676d 100644 --- a/docs/source/en/api/pipelines/text_to_video.md +++ b/docs/source/en/api/pipelines/text_to_video.md @@ -175,7 +175,7 @@ Check out the [Text or image-to-video](text-img2vid) guide for more details abou -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/text_to_video_zero.md b/docs/source/en/api/pipelines/text_to_video_zero.md index c6bf30fed7af..93219b5f3b71 100644 --- a/docs/source/en/api/pipelines/text_to_video_zero.md +++ b/docs/source/en/api/pipelines/text_to_video_zero.md @@ -284,7 +284,7 @@ You can filter out some available DreamBooth-trained models with [this link](htt -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/unclip.md b/docs/source/en/api/pipelines/unclip.md index f379ffd63f53..943cebdb28a2 100644 --- a/docs/source/en/api/pipelines/unclip.md +++ b/docs/source/en/api/pipelines/unclip.md @@ -19,7 +19,7 @@ You can find lucidrains' DALL-E 2 recreation at [lucidrains/DALLE2-pytorch](http -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/unidiffuser.md b/docs/source/en/api/pipelines/unidiffuser.md index 553a6d300152..9ae62b51fc98 100644 --- a/docs/source/en/api/pipelines/unidiffuser.md +++ b/docs/source/en/api/pipelines/unidiffuser.md @@ -192,7 +192,7 @@ print(final_prompt) -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/docs/source/en/api/pipelines/value_guided_sampling.md b/docs/source/en/api/pipelines/value_guided_sampling.md index d21dbf04d7ee..5aaee9090cef 100644 --- a/docs/source/en/api/pipelines/value_guided_sampling.md +++ b/docs/source/en/api/pipelines/value_guided_sampling.md @@ -30,7 +30,7 @@ The script to run the model is available [here](https://github.com/huggingface/d -Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-components-across-pipelines) section to learn how to efficiently load the same components into multiple pipelines. +Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines. diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py index 542b8505874f..923683ae7c38 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py @@ -160,7 +160,7 @@ def save_model_card( from diffusers import AutoPipelineForText2Image import torch {diffusers_imports_pivotal} -pipeline = AutoPipelineForText2Image.from_pretrained('runwayml/stable-diffusion-v1-5', torch_dtype=torch.float16).to('cuda') +pipeline = AutoPipelineForText2Image.from_pretrained('stable-diffusion-v1-5/stable-diffusion-v1-5', torch_dtype=torch.float16).to('cuda') pipeline.load_lora_weights('{repo_id}', weight_name='pytorch_lora_weights.safetensors') {diffusers_example_pivotal} image = pipeline('{validation_prompt if validation_prompt else instance_prompt}').images[0] diff --git a/examples/community/adaptive_mask_inpainting.py b/examples/community/adaptive_mask_inpainting.py index a9de26b29a89..5e74f6c1127d 100644 --- a/examples/community/adaptive_mask_inpainting.py +++ b/examples/community/adaptive_mask_inpainting.py @@ -372,7 +372,7 @@ def __init__( self.register_adaptive_mask_model() self.register_adaptive_mask_settings() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -386,7 +386,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "skip_prk_steps") and scheduler.config.skip_prk_steps is False: + if scheduler is not None and getattr(scheduler.config, "skip_prk_steps", True) is False: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration" " `skip_prk_steps`. `skip_prk_steps` should be set to True in the configuration file. Please make" diff --git a/examples/community/composable_stable_diffusion.py b/examples/community/composable_stable_diffusion.py index 46d12ba1f2aa..da6c1d2356be 100644 --- a/examples/community/composable_stable_diffusion.py +++ b/examples/community/composable_stable_diffusion.py @@ -89,7 +89,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -103,7 +103,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/img2img_inpainting.py b/examples/community/img2img_inpainting.py index 4dfb7a39155f..292c9aa2bc47 100644 --- a/examples/community/img2img_inpainting.py +++ b/examples/community/img2img_inpainting.py @@ -95,7 +95,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " diff --git a/examples/community/instaflow_one_step.py b/examples/community/instaflow_one_step.py index 3fef02287186..1fac74b3c5a5 100644 --- a/examples/community/instaflow_one_step.py +++ b/examples/community/instaflow_one_step.py @@ -109,7 +109,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -123,7 +123,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/interpolate_stable_diffusion.py b/examples/community/interpolate_stable_diffusion.py index 52b2707f33f7..99614635ee13 100644 --- a/examples/community/interpolate_stable_diffusion.py +++ b/examples/community/interpolate_stable_diffusion.py @@ -86,7 +86,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " diff --git a/examples/community/ip_adapter_face_id.py b/examples/community/ip_adapter_face_id.py index c7dc775eeee3..e05a27abb281 100644 --- a/examples/community/ip_adapter_face_id.py +++ b/examples/community/ip_adapter_face_id.py @@ -191,7 +191,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -205,7 +205,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/llm_grounded_diffusion.py b/examples/community/llm_grounded_diffusion.py index 49c074911354..9c2cf984f14b 100644 --- a/examples/community/llm_grounded_diffusion.py +++ b/examples/community/llm_grounded_diffusion.py @@ -336,7 +336,7 @@ def __init__( # This is copied from StableDiffusionPipeline, with hook initizations for LMD+. super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -350,7 +350,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py index ec27acdce331..4e9c5d1f6a40 100644 --- a/examples/community/lpw_stable_diffusion.py +++ b/examples/community/lpw_stable_diffusion.py @@ -496,7 +496,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -510,7 +510,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/matryoshka.py b/examples/community/matryoshka.py index 0c85ad118752..0cd85ced59a1 100644 --- a/examples/community/matryoshka.py +++ b/examples/community/matryoshka.py @@ -3766,7 +3766,7 @@ def __init__( else: raise ValueError("Currently, nesting levels 0, 1, and 2 are supported.") - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -3780,7 +3780,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - # if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + # if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: # deprecation_message = ( # f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." # " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/multilingual_stable_diffusion.py b/examples/community/multilingual_stable_diffusion.py index dc335e0b585e..5dcc75c9e20b 100644 --- a/examples/community/multilingual_stable_diffusion.py +++ b/examples/community/multilingual_stable_diffusion.py @@ -98,7 +98,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " diff --git a/examples/community/pipeline_prompt2prompt.py b/examples/community/pipeline_prompt2prompt.py index 508e84177928..3a193fb5bc9c 100644 --- a/examples/community/pipeline_prompt2prompt.py +++ b/examples/community/pipeline_prompt2prompt.py @@ -131,7 +131,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -145,7 +145,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/pipeline_stable_diffusion_boxdiff.py b/examples/community/pipeline_stable_diffusion_boxdiff.py index 6490c1400138..fe32ae7db7e4 100644 --- a/examples/community/pipeline_stable_diffusion_boxdiff.py +++ b/examples/community/pipeline_stable_diffusion_boxdiff.py @@ -417,7 +417,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -431,7 +431,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/pipeline_stable_diffusion_pag.py b/examples/community/pipeline_stable_diffusion_pag.py index cea2c9735747..12a40d44aaec 100644 --- a/examples/community/pipeline_stable_diffusion_pag.py +++ b/examples/community/pipeline_stable_diffusion_pag.py @@ -384,7 +384,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -398,7 +398,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/pipeline_zero1to3.py b/examples/community/pipeline_zero1to3.py index 95bb37ce02b7..0f7fdf627136 100644 --- a/examples/community/pipeline_zero1to3.py +++ b/examples/community/pipeline_zero1to3.py @@ -108,7 +108,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -122,7 +122,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/stable_diffusion_ipex.py b/examples/community/stable_diffusion_ipex.py index 123892f6229a..ecd38ce345c5 100644 --- a/examples/community/stable_diffusion_ipex.py +++ b/examples/community/stable_diffusion_ipex.py @@ -105,7 +105,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -119,7 +119,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/stable_diffusion_mega.py b/examples/community/stable_diffusion_mega.py index 95b4b03e4de1..77e5011d2a70 100644 --- a/examples/community/stable_diffusion_mega.py +++ b/examples/community/stable_diffusion_mega.py @@ -66,7 +66,7 @@ def __init__( requires_safety_checker: bool = True, ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " diff --git a/examples/community/stable_diffusion_reference.py b/examples/community/stable_diffusion_reference.py index efb0fa89dbfc..1c705f5c768e 100644 --- a/examples/community/stable_diffusion_reference.py +++ b/examples/community/stable_diffusion_reference.py @@ -132,7 +132,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -146,7 +146,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "skip_prk_steps") and scheduler.config.skip_prk_steps is False: + if scheduler is not None and getattr(scheduler.config, "skip_prk_steps", True) is False: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration" " `skip_prk_steps`. `skip_prk_steps` should be set to True in the configuration file. Please make" diff --git a/examples/community/stable_diffusion_repaint.py b/examples/community/stable_diffusion_repaint.py index 980e9a155997..a2b221b84969 100644 --- a/examples/community/stable_diffusion_repaint.py +++ b/examples/community/stable_diffusion_repaint.py @@ -187,7 +187,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -201,7 +201,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "skip_prk_steps") and scheduler.config.skip_prk_steps is False: + if scheduler is not None and getattr(scheduler.config, "skip_prk_steps", True) is False: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration" " `skip_prk_steps`. `skip_prk_steps` should be set to True in the configuration file. Please make" diff --git a/examples/community/stable_diffusion_tensorrt_img2img.py b/examples/community/stable_diffusion_tensorrt_img2img.py index 91540d1f4159..87a9d7cb84ec 100755 --- a/examples/community/stable_diffusion_tensorrt_img2img.py +++ b/examples/community/stable_diffusion_tensorrt_img2img.py @@ -710,7 +710,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -724,7 +724,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/stable_diffusion_tensorrt_inpaint.py b/examples/community/stable_diffusion_tensorrt_inpaint.py index b6f6711a53e7..d6b1331adac1 100755 --- a/examples/community/stable_diffusion_tensorrt_inpaint.py +++ b/examples/community/stable_diffusion_tensorrt_inpaint.py @@ -714,7 +714,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -728,7 +728,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/stable_diffusion_tensorrt_txt2img.py b/examples/community/stable_diffusion_tensorrt_txt2img.py index f8761053ed1a..b008b3bae944 100755 --- a/examples/community/stable_diffusion_tensorrt_txt2img.py +++ b/examples/community/stable_diffusion_tensorrt_txt2img.py @@ -626,7 +626,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -640,7 +640,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/examples/community/text_inpainting.py b/examples/community/text_inpainting.py index c4378ab96f28..d73082b6cf38 100644 --- a/examples/community/text_inpainting.py +++ b/examples/community/text_inpainting.py @@ -71,7 +71,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -85,7 +85,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "skip_prk_steps") and scheduler.config.skip_prk_steps is False: + if scheduler is not None and getattr(scheduler.config, "skip_prk_steps", True) is False: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration" " `skip_prk_steps`. `skip_prk_steps` should be set to True in the configuration file. Please make" diff --git a/examples/community/wildcard_stable_diffusion.py b/examples/community/wildcard_stable_diffusion.py index c866ce2ae904..3c42c54f71f8 100644 --- a/examples/community/wildcard_stable_diffusion.py +++ b/examples/community/wildcard_stable_diffusion.py @@ -120,7 +120,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " diff --git a/examples/flux-control/README.md b/examples/flux-control/README.md index 26ad9d06a2af..14afa499db0d 100644 --- a/examples/flux-control/README.md +++ b/examples/flux-control/README.md @@ -121,7 +121,7 @@ prompt = "A couple, 4k photo, highly detailed" gen_images = pipe( prompt=prompt, - condition_image=image, + control_image=image, num_inference_steps=50, joint_attention_kwargs={"scale": 0.9}, guidance_scale=25., @@ -190,7 +190,7 @@ prompt = "A couple, 4k photo, highly detailed" gen_images = pipe( prompt=prompt, - condition_image=image, + control_image=image, num_inference_steps=50, guidance_scale=25., ).images[0] @@ -200,5 +200,5 @@ gen_images.save("output.png") ## Things to note * The scripts provided in this directory are experimental and educational. This means we may have to tweak things around to get good results on a given condition. We believe this is best done with the community 🤗 -* The scripts are not memory-optimized but we offload the VAE and the text encoders to CPU when they are not used. +* The scripts are not memory-optimized but we offload the VAE and the text encoders to CPU when they are not used if `--offload` is specified. * We can extract LoRAs from the fully fine-tuned model. While we currently don't provide any utilities for that, users are welcome to refer to [this script](https://github.com/Stability-AI/stability-ComfyUI-nodes/blob/master/control_lora_create.py) that provides a similar functionality. \ No newline at end of file diff --git a/examples/flux-control/train_control_flux.py b/examples/flux-control/train_control_flux.py index 35f9a5f80342..7d0e28069054 100644 --- a/examples/flux-control/train_control_flux.py +++ b/examples/flux-control/train_control_flux.py @@ -122,7 +122,6 @@ def log_validation(flux_transformer, args, accelerator, weight_dtype, step, is_f for _ in range(args.num_validation_images): with autocast_ctx: - # need to fix in pipeline_flux_controlnet image = pipeline( prompt=validation_prompt, control_image=validation_image, @@ -159,7 +158,7 @@ def log_validation(flux_transformer, args, accelerator, weight_dtype, step, is_f images = log["images"] validation_prompt = log["validation_prompt"] validation_image = log["validation_image"] - formatted_images.append(wandb.Image(validation_image, caption="Controlnet conditioning")) + formatted_images.append(wandb.Image(validation_image, caption="Conditioning")) for image in images: image = wandb.Image(image, caption=validation_prompt) formatted_images.append(image) @@ -188,7 +187,7 @@ def save_model_card(repo_id: str, image_logs=None, base_model=str, repo_folder=N img_str += f"![images_{i})](./images_{i}.png)\n" model_description = f""" -# control-lora-{repo_id} +# flux-control-{repo_id} These are Control weights trained on {base_model} with new type of conditioning. {img_str} @@ -434,7 +433,7 @@ def parse_args(input_args=None): "--conditioning_image_column", type=str, default="conditioning_image", - help="The column of the dataset containing the controlnet conditioning image.", + help="The column of the dataset containing the control conditioning image.", ) parser.add_argument( "--caption_column", @@ -442,6 +441,7 @@ def parse_args(input_args=None): default="text", help="The column of the dataset containing a caption or a list of captions.", ) + parser.add_argument("--log_dataset_samples", action="store_true", help="Whether to log somple dataset samples.") parser.add_argument( "--max_train_samples", type=int, @@ -468,7 +468,7 @@ def parse_args(input_args=None): default=None, nargs="+", help=( - "A set of paths to the controlnet conditioning image be evaluated every `--validation_steps`" + "A set of paths to the control conditioning image be evaluated every `--validation_steps`" " and logged to `--report_to`. Provide either a matching number of `--validation_prompt`s, a" " a single `--validation_prompt` to be used with all `--validation_image`s, or a single" " `--validation_image` that will be used with all `--validation_prompt`s." @@ -505,7 +505,11 @@ def parse_args(input_args=None): default=None, help="Path to the jsonl file containing the training data.", ) - + parser.add_argument( + "--only_target_transformer_blocks", + action="store_true", + help="If we should only target the transformer blocks to train along with the input layer (`x_embedder`).", + ) parser.add_argument( "--guidance_scale", type=float, @@ -581,7 +585,7 @@ def parse_args(input_args=None): if args.resolution % 8 != 0: raise ValueError( - "`--resolution` must be divisible by 8 for consistently sized encoded images between the VAE and the controlnet encoder." + "`--resolution` must be divisible by 8 for consistently sized encoded images between the VAE and the Flux transformer." ) return args @@ -665,7 +669,12 @@ def preprocess_train(examples): conditioning_images = [image_transforms(image) for image in conditioning_images] examples["pixel_values"] = images examples["conditioning_pixel_values"] = conditioning_images - examples["captions"] = list(examples[args.caption_column]) + + is_caption_list = isinstance(examples[args.caption_column][0], list) + if is_caption_list: + examples["captions"] = [max(example, key=len) for example in examples[args.caption_column]] + else: + examples["captions"] = list(examples[args.caption_column]) return examples @@ -765,7 +774,8 @@ def main(args): subfolder="scheduler", ) noise_scheduler_copy = copy.deepcopy(noise_scheduler) - flux_transformer.requires_grad_(True) + if not args.only_target_transformer_blocks: + flux_transformer.requires_grad_(True) vae.requires_grad_(False) # cast down and move to the CPU @@ -797,6 +807,12 @@ def main(args): assert torch.all(flux_transformer.x_embedder.weight[:, initial_input_channels:].data == 0) flux_transformer.register_to_config(in_channels=initial_input_channels * 2, out_channels=initial_input_channels) + if args.only_target_transformer_blocks: + flux_transformer.x_embedder.requires_grad_(True) + for name, module in flux_transformer.named_modules(): + if "transformer_blocks" in name: + module.requires_grad_(True) + def unwrap_model(model): model = accelerator.unwrap_model(model) model = model._orig_mod if is_compiled_module(model) else model @@ -974,6 +990,32 @@ def load_model_hook(models, input_dir): else: initial_global_step = 0 + if accelerator.is_main_process and args.report_to == "wandb" and args.log_dataset_samples: + logger.info("Logging some dataset samples.") + formatted_images = [] + formatted_control_images = [] + all_prompts = [] + for i, batch in enumerate(train_dataloader): + images = (batch["pixel_values"] + 1) / 2 + control_images = (batch["conditioning_pixel_values"] + 1) / 2 + prompts = batch["captions"] + + if len(formatted_images) > 10: + break + + for img, control_img, prompt in zip(images, control_images, prompts): + formatted_images.append(img) + formatted_control_images.append(control_img) + all_prompts.append(prompt) + + logged_artifacts = [] + for img, control_img, prompt in zip(formatted_images, formatted_control_images, all_prompts): + logged_artifacts.append(wandb.Image(control_img, caption="Conditioning")) + logged_artifacts.append(wandb.Image(img, caption=prompt)) + + wandb_tracker = [tracker for tracker in accelerator.trackers if tracker.name == "wandb"] + wandb_tracker[0].log({"dataset_samples": logged_artifacts}) + progress_bar = tqdm( range(0, args.max_train_steps), initial=initial_global_step, diff --git a/examples/flux-control/train_control_lora_flux.py b/examples/flux-control/train_control_lora_flux.py index 99a05d54832f..44c684395849 100644 --- a/examples/flux-control/train_control_lora_flux.py +++ b/examples/flux-control/train_control_lora_flux.py @@ -132,7 +132,6 @@ def log_validation(flux_transformer, args, accelerator, weight_dtype, step, is_f for _ in range(args.num_validation_images): with autocast_ctx: - # need to fix in pipeline_flux_controlnet image = pipeline( prompt=validation_prompt, control_image=validation_image, @@ -169,7 +168,7 @@ def log_validation(flux_transformer, args, accelerator, weight_dtype, step, is_f images = log["images"] validation_prompt = log["validation_prompt"] validation_image = log["validation_image"] - formatted_images.append(wandb.Image(validation_image, caption="Controlnet conditioning")) + formatted_images.append(wandb.Image(validation_image, caption="Conditioning")) for image in images: image = wandb.Image(image, caption=validation_prompt) formatted_images.append(image) @@ -198,7 +197,7 @@ def save_model_card(repo_id: str, image_logs=None, base_model=str, repo_folder=N img_str += f"![images_{i})](./images_{i}.png)\n" model_description = f""" -# controlnet-lora-{repo_id} +# control-lora-{repo_id} These are Control LoRA weights trained on {base_model} with new type of conditioning. {img_str} @@ -256,7 +255,7 @@ def parse_args(input_args=None): parser.add_argument( "--output_dir", type=str, - default="controlnet-lora", + default="control-lora", help="The output directory where the model predictions and checkpoints will be written.", ) parser.add_argument( @@ -466,7 +465,7 @@ def parse_args(input_args=None): "--conditioning_image_column", type=str, default="conditioning_image", - help="The column of the dataset containing the controlnet conditioning image.", + help="The column of the dataset containing the control conditioning image.", ) parser.add_argument( "--caption_column", @@ -474,6 +473,7 @@ def parse_args(input_args=None): default="text", help="The column of the dataset containing a caption or a list of captions.", ) + parser.add_argument("--log_dataset_samples", action="store_true", help="Whether to log somple dataset samples.") parser.add_argument( "--max_train_samples", type=int, @@ -500,7 +500,7 @@ def parse_args(input_args=None): default=None, nargs="+", help=( - "A set of paths to the controlnet conditioning image be evaluated every `--validation_steps`" + "A set of paths to the control conditioning image be evaluated every `--validation_steps`" " and logged to `--report_to`. Provide either a matching number of `--validation_prompt`s, a" " a single `--validation_prompt` to be used with all `--validation_image`s, or a single" " `--validation_image` that will be used with all `--validation_prompt`s." @@ -613,7 +613,7 @@ def parse_args(input_args=None): if args.resolution % 8 != 0: raise ValueError( - "`--resolution` must be divisible by 8 for consistently sized encoded images between the VAE and the controlnet encoder." + "`--resolution` must be divisible by 8 for consistently sized encoded images between the VAE and the Flux transformer." ) return args @@ -697,7 +697,12 @@ def preprocess_train(examples): conditioning_images = [image_transforms(image) for image in conditioning_images] examples["pixel_values"] = images examples["conditioning_pixel_values"] = conditioning_images - examples["captions"] = list(examples[args.caption_column]) + + is_caption_list = isinstance(examples[args.caption_column][0], list) + if is_caption_list: + examples["captions"] = [max(example, key=len) for example in examples[args.caption_column]] + else: + examples["captions"] = list(examples[args.caption_column]) return examples @@ -1132,6 +1137,32 @@ def load_model_hook(models, input_dir): else: initial_global_step = 0 + if accelerator.is_main_process and args.report_to == "wandb" and args.log_dataset_samples: + logger.info("Logging some dataset samples.") + formatted_images = [] + formatted_control_images = [] + all_prompts = [] + for i, batch in enumerate(train_dataloader): + images = (batch["pixel_values"] + 1) / 2 + control_images = (batch["conditioning_pixel_values"] + 1) / 2 + prompts = batch["captions"] + + if len(formatted_images) > 10: + break + + for img, control_img, prompt in zip(images, control_images, prompts): + formatted_images.append(img) + formatted_control_images.append(control_img) + all_prompts.append(prompt) + + logged_artifacts = [] + for img, control_img, prompt in zip(formatted_images, formatted_control_images, all_prompts): + logged_artifacts.append(wandb.Image(control_img, caption="Conditioning")) + logged_artifacts.append(wandb.Image(img, caption=prompt)) + + wandb_tracker = [tracker for tracker in accelerator.trackers if tracker.name == "wandb"] + wandb_tracker[0].log({"dataset_samples": logged_artifacts}) + progress_bar = tqdm( range(0, args.max_train_steps), initial=initial_global_step, diff --git a/examples/research_projects/instructpix2pix_lora/README.md b/examples/research_projects/instructpix2pix_lora/README.md index cfcd98926c07..25f7931b47d4 100644 --- a/examples/research_projects/instructpix2pix_lora/README.md +++ b/examples/research_projects/instructpix2pix_lora/README.md @@ -2,6 +2,34 @@ This extended LoRA training script was authored by [Aiden-Frost](https://github.com/Aiden-Frost). This is an experimental LoRA extension of [this example](https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/train_instruct_pix2pix.py). This script provides further support add LoRA layers for unet model. +## Running locally with PyTorch +### Installing the dependencies + +Before running the scripts, make sure to install the library's training dependencies: + +**Important** + +To make sure you can successfully run the latest versions of the example scripts, we highly recommend **installing from source** and keeping the install up to date as we update the example scripts frequently and install some example-specific requirements. To do this, execute the following steps in a new virtual environment: +```bash +git clone https://github.com/huggingface/diffusers +cd diffusers +pip install . +``` + +Then cd in the example folder and run +```bash +pip install -r requirements.txt +``` + +And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with: + +```bash +accelerate config +``` + +Note also that we use PEFT library as backend for LoRA training, make sure to have `peft>=0.6.0` installed in your environment. + + ## Training script example ```bash @@ -9,7 +37,7 @@ export MODEL_ID="timbrooks/instruct-pix2pix" export DATASET_ID="instruction-tuning-sd/cartoonization" export OUTPUT_DIR="instructPix2Pix-cartoonization" -accelerate launch finetune_instruct_pix2pix.py \ +accelerate launch train_instruct_pix2pix_lora.py \ --pretrained_model_name_or_path=$MODEL_ID \ --dataset_name=$DATASET_ID \ --enable_xformers_memory_efficient_attention \ @@ -24,7 +52,10 @@ accelerate launch finetune_instruct_pix2pix.py \ --rank=4 \ --output_dir=$OUTPUT_DIR \ --report_to=wandb \ - --push_to_hub + --push_to_hub \ + --original_image_column="original_image" \ + --edited_image_column="cartoonized_image" \ + --edit_prompt_column="edit_prompt" ``` ## Inference diff --git a/examples/research_projects/instructpix2pix_lora/train_instruct_pix2pix_lora.py b/examples/research_projects/instructpix2pix_lora/train_instruct_pix2pix_lora.py index 997d448fa281..fcb927c680a0 100644 --- a/examples/research_projects/instructpix2pix_lora/train_instruct_pix2pix_lora.py +++ b/examples/research_projects/instructpix2pix_lora/train_instruct_pix2pix_lora.py @@ -14,7 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Script to fine-tune Stable Diffusion for InstructPix2Pix.""" +""" + Script to fine-tune Stable Diffusion for LORA InstructPix2Pix. + Base code referred from: https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/train_instruct_pix2pix.py +""" import argparse import logging @@ -30,6 +33,7 @@ import PIL import requests import torch +import torch.nn as nn import torch.nn.functional as F import torch.utils.checkpoint import transformers @@ -39,21 +43,28 @@ from datasets import load_dataset from huggingface_hub import create_repo, upload_folder from packaging import version +from peft import LoraConfig +from peft.utils import get_peft_model_state_dict from torchvision import transforms from tqdm.auto import tqdm from transformers import CLIPTextModel, CLIPTokenizer import diffusers from diffusers import AutoencoderKL, DDPMScheduler, StableDiffusionInstructPix2PixPipeline, UNet2DConditionModel -from diffusers.models.lora import LoRALinearLayer from diffusers.optimization import get_scheduler -from diffusers.training_utils import EMAModel -from diffusers.utils import check_min_version, deprecate, is_wandb_available +from diffusers.training_utils import EMAModel, cast_training_params +from diffusers.utils import check_min_version, convert_state_dict_to_diffusers, deprecate, is_wandb_available +from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card from diffusers.utils.import_utils import is_xformers_available +from diffusers.utils.torch_utils import is_compiled_module + + +if is_wandb_available(): + import wandb # Will error if the minimal version of diffusers is not installed. Remove at your own risks. -check_min_version("0.26.0.dev0") +check_min_version("0.32.0.dev0") logger = get_logger(__name__, log_level="INFO") @@ -63,6 +74,92 @@ WANDB_TABLE_COL_NAMES = ["original_image", "edited_image", "edit_prompt"] +def save_model_card( + repo_id: str, + images: list = None, + base_model: str = None, + dataset_name: str = None, + repo_folder: str = None, +): + img_str = "" + if images is not None: + for i, image in enumerate(images): + image.save(os.path.join(repo_folder, f"image_{i}.png")) + img_str += f"![img_{i}](./image_{i}.png)\n" + + model_description = f""" +# LoRA text2image fine-tuning - {repo_id} +These are LoRA adaption weights for {base_model}. The weights were fine-tuned on the {dataset_name} dataset. You can find some example images in the following. \n +{img_str} +""" + + model_card = load_or_create_model_card( + repo_id_or_path=repo_id, + from_training=True, + license="creativeml-openrail-m", + base_model=base_model, + model_description=model_description, + inference=True, + ) + + tags = [ + "stable-diffusion", + "stable-diffusion-diffusers", + "text-to-image", + "instruct-pix2pix", + "diffusers", + "diffusers-training", + "lora", + ] + model_card = populate_model_card(model_card, tags=tags) + + model_card.save(os.path.join(repo_folder, "README.md")) + + +def log_validation( + pipeline, + args, + accelerator, + generator, +): + logger.info( + f"Running validation... \n Generating {args.num_validation_images} images with prompt:" + f" {args.validation_prompt}." + ) + pipeline = pipeline.to(accelerator.device) + pipeline.set_progress_bar_config(disable=True) + + # run inference + original_image = download_image(args.val_image_url) + edited_images = [] + if torch.backends.mps.is_available(): + autocast_ctx = nullcontext() + else: + autocast_ctx = torch.autocast(accelerator.device.type) + + with autocast_ctx: + for _ in range(args.num_validation_images): + edited_images.append( + pipeline( + args.validation_prompt, + image=original_image, + num_inference_steps=20, + image_guidance_scale=1.5, + guidance_scale=7, + generator=generator, + ).images[0] + ) + + for tracker in accelerator.trackers: + if tracker.name == "wandb": + wandb_table = wandb.Table(columns=WANDB_TABLE_COL_NAMES) + for edited_image in edited_images: + wandb_table.add_data(wandb.Image(original_image), wandb.Image(edited_image), args.validation_prompt) + tracker.log({"validation": wandb_table}) + + return edited_images + + def parse_args(): parser = argparse.ArgumentParser(description="Simple example of a training script for InstructPix2Pix.") parser.add_argument( @@ -417,11 +514,6 @@ def main(): generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) - if args.report_to == "wandb": - if not is_wandb_available(): - raise ImportError("Make sure to install wandb if you want to use it for logging during training.") - import wandb - # Make one log on every process with the configuration for debugging. logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", @@ -467,49 +559,58 @@ def main(): args.pretrained_model_name_or_path, subfolder="unet", revision=args.non_ema_revision ) + # InstructPix2Pix uses an additional image for conditioning. To accommodate that, + # it uses 8 channels (instead of 4) in the first (conv) layer of the UNet. This UNet is + # then fine-tuned on the custom InstructPix2Pix dataset. This modified UNet is initialized + # from the pre-trained checkpoints. For the extra channels added to the first layer, they are + # initialized to zero. + logger.info("Initializing the InstructPix2Pix UNet from the pretrained UNet.") + in_channels = 8 + out_channels = unet.conv_in.out_channels + unet.register_to_config(in_channels=in_channels) + + with torch.no_grad(): + new_conv_in = nn.Conv2d( + in_channels, out_channels, unet.conv_in.kernel_size, unet.conv_in.stride, unet.conv_in.padding + ) + new_conv_in.weight.zero_() + new_conv_in.weight[:, :in_channels, :, :].copy_(unet.conv_in.weight) + unet.conv_in = new_conv_in + # Freeze vae, text_encoder and unet vae.requires_grad_(False) text_encoder.requires_grad_(False) unet.requires_grad_(False) # referred to https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image_lora.py - unet_lora_parameters = [] - for attn_processor_name, attn_processor in unet.attn_processors.items(): - # Parse the attention module. - attn_module = unet - for n in attn_processor_name.split(".")[:-1]: - attn_module = getattr(attn_module, n) - - # Set the `lora_layer` attribute of the attention-related matrices. - attn_module.to_q.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_q.in_features, out_features=attn_module.to_q.out_features, rank=args.rank - ) - ) - attn_module.to_k.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_k.in_features, out_features=attn_module.to_k.out_features, rank=args.rank - ) - ) + # For mixed precision training we cast all non-trainable weights (vae, non-lora text_encoder and non-lora unet) to half-precision + # as these weights are only used for inference, keeping weights in full precision is not required. + weight_dtype = torch.float32 + if accelerator.mixed_precision == "fp16": + weight_dtype = torch.float16 + elif accelerator.mixed_precision == "bf16": + weight_dtype = torch.bfloat16 - attn_module.to_v.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_v.in_features, out_features=attn_module.to_v.out_features, rank=args.rank - ) - ) - attn_module.to_out[0].set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_out[0].in_features, - out_features=attn_module.to_out[0].out_features, - rank=args.rank, - ) - ) + # Freeze the unet parameters before adding adapters + unet.requires_grad_(False) - # Accumulate the LoRA params to optimize. - unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters()) + unet_lora_config = LoraConfig( + r=args.rank, + lora_alpha=args.rank, + init_lora_weights="gaussian", + target_modules=["to_k", "to_q", "to_v", "to_out.0"], + ) + + # Move unet, vae and text_encoder to device and cast to weight_dtype + unet.to(accelerator.device, dtype=weight_dtype) + vae.to(accelerator.device, dtype=weight_dtype) + text_encoder.to(accelerator.device, dtype=weight_dtype) + + # Add adapter and make sure the trainable params are in float32. + unet.add_adapter(unet_lora_config) + if args.mixed_precision == "fp16": + # only upcast trainable parameters (LoRA) into fp32 + cast_training_params(unet, dtype=torch.float32) # Create EMA for the unet. if args.use_ema: @@ -528,6 +629,13 @@ def main(): else: raise ValueError("xformers is not available. Make sure it is installed correctly") + trainable_params = filter(lambda p: p.requires_grad, unet.parameters()) + + def unwrap_model(model): + model = accelerator.unwrap_model(model) + model = model._orig_mod if is_compiled_module(model) else model + return model + # `accelerate` 0.16.0 will have better support for customized saving if version.parse(accelerate.__version__) >= version.parse("0.16.0"): # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format @@ -540,7 +648,8 @@ def save_model_hook(models, weights, output_dir): model.save_pretrained(os.path.join(output_dir, "unet")) # make sure to pop weight so that corresponding model is not saved again - weights.pop() + if weights: + weights.pop() def load_model_hook(models, input_dir): if args.use_ema: @@ -589,9 +698,9 @@ def load_model_hook(models, input_dir): else: optimizer_cls = torch.optim.AdamW - # train on only unet_lora_parameters + # train on only lora_layers optimizer = optimizer_cls( - unet_lora_parameters, + trainable_params, lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, @@ -730,22 +839,27 @@ def collate_fn(examples): ) # Scheduler and math around the number of training steps. - overrode_max_train_steps = False - num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) + # Check the PR https://github.com/huggingface/diffusers/pull/8312 for detailed explanation. + num_warmup_steps_for_scheduler = args.lr_warmup_steps * accelerator.num_processes if args.max_train_steps is None: - args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch - overrode_max_train_steps = True + len_train_dataloader_after_sharding = math.ceil(len(train_dataloader) / accelerator.num_processes) + num_update_steps_per_epoch = math.ceil(len_train_dataloader_after_sharding / args.gradient_accumulation_steps) + num_training_steps_for_scheduler = ( + args.num_train_epochs * num_update_steps_per_epoch * accelerator.num_processes + ) + else: + num_training_steps_for_scheduler = args.max_train_steps * accelerator.num_processes lr_scheduler = get_scheduler( args.lr_scheduler, optimizer=optimizer, - num_warmup_steps=args.lr_warmup_steps * accelerator.num_processes, - num_training_steps=args.max_train_steps * accelerator.num_processes, + num_warmup_steps=num_warmup_steps_for_scheduler, + num_training_steps=num_training_steps_for_scheduler, ) # Prepare everything with our `accelerator`. - unet, unet_lora_parameters, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( - unet, unet_lora_parameters, optimizer, train_dataloader, lr_scheduler + unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + unet, optimizer, train_dataloader, lr_scheduler ) if args.use_ema: @@ -765,8 +879,14 @@ def collate_fn(examples): # We need to recalculate our total training steps as the size of the training dataloader may have changed. num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) - if overrode_max_train_steps: + if args.max_train_steps is None: args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch + if num_training_steps_for_scheduler != args.max_train_steps * accelerator.num_processes: + logger.warning( + f"The length of the 'train_dataloader' after 'accelerator.prepare' ({len(train_dataloader)}) does not match " + f"the expected length ({len_train_dataloader_after_sharding}) when the learning rate scheduler was created. " + f"This inconsistency may result in the learning rate scheduler not functioning properly." + ) # Afterwards we recalculate our number of training epochs args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) @@ -885,7 +1005,7 @@ def collate_fn(examples): raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}") # Predict the noise residual and compute loss - model_pred = unet(concatenated_noisy_latents, timesteps, encoder_hidden_states).sample + model_pred = unet(concatenated_noisy_latents, timesteps, encoder_hidden_states, return_dict=False)[0] loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean") # Gather the losses across all processes for logging (if we use distributed training). @@ -895,7 +1015,7 @@ def collate_fn(examples): # Backpropagate accelerator.backward(loss) if accelerator.sync_gradients: - accelerator.clip_grad_norm_(unet_lora_parameters, args.max_grad_norm) + accelerator.clip_grad_norm_(trainable_params, args.max_grad_norm) optimizer.step() lr_scheduler.step() optimizer.zero_grad() @@ -903,7 +1023,7 @@ def collate_fn(examples): # Checks if the accelerator has performed an optimization step behind the scenes if accelerator.sync_gradients: if args.use_ema: - ema_unet.step(unet_lora_parameters) + ema_unet.step(trainable_params) progress_bar.update(1) global_step += 1 accelerator.log({"train_loss": train_loss}, step=global_step) @@ -933,6 +1053,16 @@ def collate_fn(examples): save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}") accelerator.save_state(save_path) + unwrapped_unet = unwrap_model(unet) + unet_lora_state_dict = convert_state_dict_to_diffusers( + get_peft_model_state_dict(unwrapped_unet) + ) + + StableDiffusionInstructPix2PixPipeline.save_lora_weights( + save_directory=save_path, + unet_lora_layers=unet_lora_state_dict, + safe_serialization=True, + ) logger.info(f"Saved state to {save_path}") logs = {"step_loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]} @@ -959,45 +1089,22 @@ def collate_fn(examples): # The models need unwrapping because for compatibility in distributed training mode. pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained( args.pretrained_model_name_or_path, - unet=accelerator.unwrap_model(unet), - text_encoder=accelerator.unwrap_model(text_encoder), - vae=accelerator.unwrap_model(vae), + unet=unwrap_model(unet), + text_encoder=unwrap_model(text_encoder), + vae=unwrap_model(vae), revision=args.revision, variant=args.variant, torch_dtype=weight_dtype, ) - pipeline = pipeline.to(accelerator.device) - pipeline.set_progress_bar_config(disable=True) # run inference - original_image = download_image(args.val_image_url) - edited_images = [] - if torch.backends.mps.is_available(): - autocast_ctx = nullcontext() - else: - autocast_ctx = torch.autocast(accelerator.device.type) - - with autocast_ctx: - for _ in range(args.num_validation_images): - edited_images.append( - pipeline( - args.validation_prompt, - image=original_image, - num_inference_steps=20, - image_guidance_scale=1.5, - guidance_scale=7, - generator=generator, - ).images[0] - ) + log_validation( + pipeline, + args, + accelerator, + generator, + ) - for tracker in accelerator.trackers: - if tracker.name == "wandb": - wandb_table = wandb.Table(columns=WANDB_TABLE_COL_NAMES) - for edited_image in edited_images: - wandb_table.add_data( - wandb.Image(original_image), wandb.Image(edited_image), args.validation_prompt - ) - tracker.log({"validation": wandb_table}) if args.use_ema: # Switch back to the original UNet parameters. ema_unet.restore(unet.parameters()) @@ -1008,22 +1115,47 @@ def collate_fn(examples): # Create the pipeline using the trained modules and save it. accelerator.wait_for_everyone() if accelerator.is_main_process: - unet = accelerator.unwrap_model(unet) if args.use_ema: ema_unet.copy_to(unet.parameters()) + # store only LORA layers + unet = unet.to(torch.float32) + + unwrapped_unet = unwrap_model(unet) + unet_lora_state_dict = convert_state_dict_to_diffusers(get_peft_model_state_dict(unwrapped_unet)) + StableDiffusionInstructPix2PixPipeline.save_lora_weights( + save_directory=args.output_dir, + unet_lora_layers=unet_lora_state_dict, + safe_serialization=True, + ) + pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained( args.pretrained_model_name_or_path, - text_encoder=accelerator.unwrap_model(text_encoder), - vae=accelerator.unwrap_model(vae), - unet=unet, + text_encoder=unwrap_model(text_encoder), + vae=unwrap_model(vae), + unet=unwrap_model(unet), revision=args.revision, variant=args.variant, ) - # store only LORA layers - unet.save_attn_procs(args.output_dir) + pipeline.load_lora_weights(args.output_dir) + + images = None + if (args.val_image_url is not None) and (args.validation_prompt is not None): + images = log_validation( + pipeline, + args, + accelerator, + generator, + ) if args.push_to_hub: + save_model_card( + repo_id, + images=images, + base_model=args.pretrained_model_name_or_path, + dataset_name=args.dataset_name, + repo_folder=args.output_dir, + ) upload_folder( repo_id=repo_id, folder_path=args.output_dir, @@ -1031,31 +1163,6 @@ def collate_fn(examples): ignore_patterns=["step_*", "epoch_*"], ) - if args.validation_prompt is not None: - edited_images = [] - pipeline = pipeline.to(accelerator.device) - with torch.autocast(str(accelerator.device).replace(":0", "")): - for _ in range(args.num_validation_images): - edited_images.append( - pipeline( - args.validation_prompt, - image=original_image, - num_inference_steps=20, - image_guidance_scale=1.5, - guidance_scale=7, - generator=generator, - ).images[0] - ) - - for tracker in accelerator.trackers: - if tracker.name == "wandb": - wandb_table = wandb.Table(columns=WANDB_TABLE_COL_NAMES) - for edited_image in edited_images: - wandb_table.add_data( - wandb.Image(original_image), wandb.Image(edited_image), args.validation_prompt - ) - tracker.log({"test": wandb_table}) - accelerator.end_training() diff --git a/scripts/convert_blipdiffusion_to_diffusers.py b/scripts/convert_blipdiffusion_to_diffusers.py index 03cf67e5476b..2c286ea0fdc7 100644 --- a/scripts/convert_blipdiffusion_to_diffusers.py +++ b/scripts/convert_blipdiffusion_to_diffusers.py @@ -303,10 +303,11 @@ def save_blip_diffusion_model(model, args): qformer = get_qformer(model) qformer.eval() - text_encoder = ContextCLIPTextModel.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="text_encoder") - vae = AutoencoderKL.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="vae") - - unet = UNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="unet") + text_encoder = ContextCLIPTextModel.from_pretrained( + "stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="text_encoder" + ) + vae = AutoencoderKL.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="vae") + unet = UNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet") vae.eval() text_encoder.eval() scheduler = PNDMScheduler( @@ -316,7 +317,7 @@ def save_blip_diffusion_model(model, args): set_alpha_to_one=False, skip_prk_steps=True, ) - tokenizer = CLIPTokenizer.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="tokenizer") + tokenizer = CLIPTokenizer.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="tokenizer") image_processor = BlipImageProcessor() blip_diffusion = BlipDiffusionPipeline( tokenizer=tokenizer, diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py index 07c2c2272422..e064aeba43b6 100644 --- a/src/diffusers/loaders/lora_conversion_utils.py +++ b/src/diffusers/loaders/lora_conversion_utils.py @@ -973,3 +973,178 @@ def swap_scale_shift(weight): converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key) return converted_state_dict + + +def _convert_hunyuan_video_lora_to_diffusers(original_state_dict): + converted_state_dict = {k: original_state_dict.pop(k) for k in list(original_state_dict.keys())} + + def remap_norm_scale_shift_(key, state_dict): + weight = state_dict.pop(key) + shift, scale = weight.chunk(2, dim=0) + new_weight = torch.cat([scale, shift], dim=0) + state_dict[key.replace("final_layer.adaLN_modulation.1", "norm_out.linear")] = new_weight + + def remap_txt_in_(key, state_dict): + def rename_key(key): + new_key = key.replace("individual_token_refiner.blocks", "token_refiner.refiner_blocks") + new_key = new_key.replace("adaLN_modulation.1", "norm_out.linear") + new_key = new_key.replace("txt_in", "context_embedder") + new_key = new_key.replace("t_embedder.mlp.0", "time_text_embed.timestep_embedder.linear_1") + new_key = new_key.replace("t_embedder.mlp.2", "time_text_embed.timestep_embedder.linear_2") + new_key = new_key.replace("c_embedder", "time_text_embed.text_embedder") + new_key = new_key.replace("mlp", "ff") + return new_key + + if "self_attn_qkv" in key: + weight = state_dict.pop(key) + to_q, to_k, to_v = weight.chunk(3, dim=0) + state_dict[rename_key(key.replace("self_attn_qkv", "attn.to_q"))] = to_q + state_dict[rename_key(key.replace("self_attn_qkv", "attn.to_k"))] = to_k + state_dict[rename_key(key.replace("self_attn_qkv", "attn.to_v"))] = to_v + else: + state_dict[rename_key(key)] = state_dict.pop(key) + + def remap_img_attn_qkv_(key, state_dict): + weight = state_dict.pop(key) + if "lora_A" in key: + state_dict[key.replace("img_attn_qkv", "attn.to_q")] = weight + state_dict[key.replace("img_attn_qkv", "attn.to_k")] = weight + state_dict[key.replace("img_attn_qkv", "attn.to_v")] = weight + else: + to_q, to_k, to_v = weight.chunk(3, dim=0) + state_dict[key.replace("img_attn_qkv", "attn.to_q")] = to_q + state_dict[key.replace("img_attn_qkv", "attn.to_k")] = to_k + state_dict[key.replace("img_attn_qkv", "attn.to_v")] = to_v + + def remap_txt_attn_qkv_(key, state_dict): + weight = state_dict.pop(key) + if "lora_A" in key: + state_dict[key.replace("txt_attn_qkv", "attn.add_q_proj")] = weight + state_dict[key.replace("txt_attn_qkv", "attn.add_k_proj")] = weight + state_dict[key.replace("txt_attn_qkv", "attn.add_v_proj")] = weight + else: + to_q, to_k, to_v = weight.chunk(3, dim=0) + state_dict[key.replace("txt_attn_qkv", "attn.add_q_proj")] = to_q + state_dict[key.replace("txt_attn_qkv", "attn.add_k_proj")] = to_k + state_dict[key.replace("txt_attn_qkv", "attn.add_v_proj")] = to_v + + def remap_single_transformer_blocks_(key, state_dict): + hidden_size = 3072 + + if "linear1.lora_A.weight" in key or "linear1.lora_B.weight" in key: + linear1_weight = state_dict.pop(key) + if "lora_A" in key: + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix( + ".linear1.lora_A.weight" + ) + state_dict[f"{new_key}.attn.to_q.lora_A.weight"] = linear1_weight + state_dict[f"{new_key}.attn.to_k.lora_A.weight"] = linear1_weight + state_dict[f"{new_key}.attn.to_v.lora_A.weight"] = linear1_weight + state_dict[f"{new_key}.proj_mlp.lora_A.weight"] = linear1_weight + else: + split_size = (hidden_size, hidden_size, hidden_size, linear1_weight.size(0) - 3 * hidden_size) + q, k, v, mlp = torch.split(linear1_weight, split_size, dim=0) + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix( + ".linear1.lora_B.weight" + ) + state_dict[f"{new_key}.attn.to_q.lora_B.weight"] = q + state_dict[f"{new_key}.attn.to_k.lora_B.weight"] = k + state_dict[f"{new_key}.attn.to_v.lora_B.weight"] = v + state_dict[f"{new_key}.proj_mlp.lora_B.weight"] = mlp + + elif "linear1.lora_A.bias" in key or "linear1.lora_B.bias" in key: + linear1_bias = state_dict.pop(key) + if "lora_A" in key: + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix( + ".linear1.lora_A.bias" + ) + state_dict[f"{new_key}.attn.to_q.lora_A.bias"] = linear1_bias + state_dict[f"{new_key}.attn.to_k.lora_A.bias"] = linear1_bias + state_dict[f"{new_key}.attn.to_v.lora_A.bias"] = linear1_bias + state_dict[f"{new_key}.proj_mlp.lora_A.bias"] = linear1_bias + else: + split_size = (hidden_size, hidden_size, hidden_size, linear1_bias.size(0) - 3 * hidden_size) + q_bias, k_bias, v_bias, mlp_bias = torch.split(linear1_bias, split_size, dim=0) + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix( + ".linear1.lora_B.bias" + ) + state_dict[f"{new_key}.attn.to_q.lora_B.bias"] = q_bias + state_dict[f"{new_key}.attn.to_k.lora_B.bias"] = k_bias + state_dict[f"{new_key}.attn.to_v.lora_B.bias"] = v_bias + state_dict[f"{new_key}.proj_mlp.lora_B.bias"] = mlp_bias + + else: + new_key = key.replace("single_blocks", "single_transformer_blocks") + new_key = new_key.replace("linear2", "proj_out") + new_key = new_key.replace("q_norm", "attn.norm_q") + new_key = new_key.replace("k_norm", "attn.norm_k") + state_dict[new_key] = state_dict.pop(key) + + TRANSFORMER_KEYS_RENAME_DICT = { + "img_in": "x_embedder", + "time_in.mlp.0": "time_text_embed.timestep_embedder.linear_1", + "time_in.mlp.2": "time_text_embed.timestep_embedder.linear_2", + "guidance_in.mlp.0": "time_text_embed.guidance_embedder.linear_1", + "guidance_in.mlp.2": "time_text_embed.guidance_embedder.linear_2", + "vector_in.in_layer": "time_text_embed.text_embedder.linear_1", + "vector_in.out_layer": "time_text_embed.text_embedder.linear_2", + "double_blocks": "transformer_blocks", + "img_attn_q_norm": "attn.norm_q", + "img_attn_k_norm": "attn.norm_k", + "img_attn_proj": "attn.to_out.0", + "txt_attn_q_norm": "attn.norm_added_q", + "txt_attn_k_norm": "attn.norm_added_k", + "txt_attn_proj": "attn.to_add_out", + "img_mod.linear": "norm1.linear", + "img_norm1": "norm1.norm", + "img_norm2": "norm2", + "img_mlp": "ff", + "txt_mod.linear": "norm1_context.linear", + "txt_norm1": "norm1.norm", + "txt_norm2": "norm2_context", + "txt_mlp": "ff_context", + "self_attn_proj": "attn.to_out.0", + "modulation.linear": "norm.linear", + "pre_norm": "norm.norm", + "final_layer.norm_final": "norm_out.norm", + "final_layer.linear": "proj_out", + "fc1": "net.0.proj", + "fc2": "net.2", + "input_embedder": "proj_in", + } + + TRANSFORMER_SPECIAL_KEYS_REMAP = { + "txt_in": remap_txt_in_, + "img_attn_qkv": remap_img_attn_qkv_, + "txt_attn_qkv": remap_txt_attn_qkv_, + "single_blocks": remap_single_transformer_blocks_, + "final_layer.adaLN_modulation.1": remap_norm_scale_shift_, + } + + # Some folks attempt to make their state dict compatible with diffusers by adding "transformer." prefix to all keys + # and use their custom code. To make sure both "original" and "attempted diffusers" loras work as expected, we make + # sure that both follow the same initial format by stripping off the "transformer." prefix. + for key in list(converted_state_dict.keys()): + if key.startswith("transformer."): + converted_state_dict[key[len("transformer.") :]] = converted_state_dict.pop(key) + if key.startswith("diffusion_model."): + converted_state_dict[key[len("diffusion_model.") :]] = converted_state_dict.pop(key) + + # Rename and remap the state dict keys + for key in list(converted_state_dict.keys()): + new_key = key[:] + for replace_key, rename_key in TRANSFORMER_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + converted_state_dict[new_key] = converted_state_dict.pop(key) + + for key in list(converted_state_dict.keys()): + for special_key, handler_fn_inplace in TRANSFORMER_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + # Add back the "transformer." prefix + for key in list(converted_state_dict.keys()): + converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key) + + return converted_state_dict diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py index f55d9958e5c3..b5fda3c88635 100644 --- a/src/diffusers/loaders/lora_pipeline.py +++ b/src/diffusers/loaders/lora_pipeline.py @@ -36,6 +36,7 @@ from .lora_base import LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE, LoraBaseMixin, _fetch_state_dict # noqa from .lora_conversion_utils import ( _convert_bfl_flux_control_lora_to_diffusers, + _convert_hunyuan_video_lora_to_diffusers, _convert_kohya_flux_lora_to_diffusers, _convert_non_diffusers_lora_to_diffusers, _convert_xlabs_flux_lora_to_diffusers, @@ -2277,8 +2278,24 @@ def unfuse_lora(self, components: List[str] = ["transformer", "text_encoder"], * super().unfuse_lora(components=components) - # We override this here account for `_transformer_norm_layers`. - def unload_lora_weights(self): + # We override this here account for `_transformer_norm_layers` and `_overwritten_params`. + def unload_lora_weights(self, reset_to_overwritten_params=False): + """ + Unloads the LoRA parameters. + + Args: + reset_to_overwritten_params (`bool`, defaults to `False`): Whether to reset the LoRA-loaded modules + to their original params. Refer to the [Flux + documentation](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux) to learn more. + + Examples: + + ```python + >>> # Assuming `pipeline` is already loaded with the LoRA parameters. + >>> pipeline.unload_lora_weights() + >>> ... + ``` + """ super().unload_lora_weights() transformer = getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer @@ -2286,7 +2303,7 @@ def unload_lora_weights(self): transformer.load_state_dict(transformer._transformer_norm_layers, strict=False) transformer._transformer_norm_layers = None - if getattr(transformer, "_overwritten_params", None) is not None: + if reset_to_overwritten_params and getattr(transformer, "_overwritten_params", None) is not None: overwritten_params = transformer._overwritten_params module_names = set() @@ -3991,7 +4008,6 @@ class HunyuanVideoLoraLoaderMixin(LoraBaseMixin): @classmethod @validate_hf_hub_args - # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict def lora_state_dict( cls, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], @@ -4002,7 +4018,7 @@ def lora_state_dict( - We support loading A1111 formatted LoRA checkpoints in a limited capacity. + We support loading original format HunyuanVideo LoRA checkpoints. This function is experimental and might change in the future. @@ -4085,6 +4101,10 @@ def lora_state_dict( logger.warning(warn_msg) state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + is_original_hunyuan_video = any("img_attn_qkv" in k for k in state_dict) + if is_original_hunyuan_video: + state_dict = _convert_hunyuan_video_lora_to_diffusers(state_dict) + return state_dict # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights @@ -4223,10 +4243,9 @@ def save_lora_weights( safe_serialization=safe_serialization, ) - # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.fuse_lora with unet->transformer def fuse_lora( self, - components: List[str] = ["transformer", "text_encoder"], + components: List[str] = ["transformer"], lora_scale: float = 1.0, safe_fusing: bool = False, adapter_names: Optional[List[str]] = None, @@ -4267,8 +4286,7 @@ def fuse_lora( components=components, lora_scale=lora_scale, safe_fusing=safe_fusing, adapter_names=adapter_names ) - # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.unfuse_lora with unet->transformer - def unfuse_lora(self, components: List[str] = ["transformer", "text_encoder"], **kwargs): + def unfuse_lora(self, components: List[str] = ["transformer"], **kwargs): r""" Reverses the effect of [`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraBaseMixin.fuse_lora). diff --git a/src/diffusers/loaders/single_file.py b/src/diffusers/loaders/single_file.py index c0cbfc713857..c5c9bea29b8a 100644 --- a/src/diffusers/loaders/single_file.py +++ b/src/diffusers/loaders/single_file.py @@ -329,7 +329,7 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs): >>> # Enable float16 and move to GPU >>> pipeline = StableDiffusionPipeline.from_single_file( - ... "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt", + ... "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt", ... torch_dtype=torch.float16, ... ) >>> pipeline.to("cuda") diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index b623576e3990..1fa1bdf259cc 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -109,6 +109,7 @@ "autoencoder-dc-sana": "encoder.project_in.conv.bias", "mochi-1-preview": ["model.diffusion_model.blocks.0.attn.qkv_x.weight", "blocks.0.attn.qkv_x.weight"], "hunyuan-video": "txt_in.individual_token_refiner.blocks.0.adaLN_modulation.1.bias", + "instruct-pix2pix": "model.diffusion_model.input_blocks.0.0.weight", } DIFFUSERS_DEFAULT_PIPELINE_PATHS = { @@ -165,6 +166,7 @@ "autoencoder-dc-f32c32-sana": {"pretrained_model_name_or_path": "mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers"}, "mochi-1-preview": {"pretrained_model_name_or_path": "genmo/mochi-1-preview"}, "hunyuan-video": {"pretrained_model_name_or_path": "hunyuanvideo-community/HunyuanVideo"}, + "instruct-pix2pix": {"pretrained_model_name_or_path": "timbrooks/instruct-pix2pix"}, } # Use to configure model sample size when original config is provided @@ -633,6 +635,12 @@ def infer_diffusers_model_type(checkpoint): elif CHECKPOINT_KEY_NAMES["hunyuan-video"] in checkpoint: model_type = "hunyuan-video" + elif ( + CHECKPOINT_KEY_NAMES["instruct-pix2pix"] in checkpoint + and checkpoint[CHECKPOINT_KEY_NAMES["instruct-pix2pix"]].shape[1] == 8 + ): + model_type = "instruct-pix2pix" + else: model_type = "v1" diff --git a/src/diffusers/loaders/textual_inversion.py b/src/diffusers/loaders/textual_inversion.py index 0162d67a340c..095d154cb4fe 100644 --- a/src/diffusers/loaders/textual_inversion.py +++ b/src/diffusers/loaders/textual_inversion.py @@ -333,7 +333,7 @@ def load_textual_inversion( from diffusers import StableDiffusionPipeline import torch - model_id = "runwayml/stable-diffusion-v1-5" + model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") pipe.load_textual_inversion("sd-concepts-library/cat-toy") @@ -352,7 +352,7 @@ def load_textual_inversion( from diffusers import StableDiffusionPipeline import torch - model_id = "runwayml/stable-diffusion-v1-5" + model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") pipe.load_textual_inversion("./charturnerv2.pt", token="charturnerv2") @@ -469,7 +469,7 @@ def unload_textual_inversion( from diffusers import AutoPipelineForText2Image import torch - pipeline = AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5") + pipeline = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") # Example 1 pipeline.load_textual_inversion("sd-concepts-library/gta5-artwork") diff --git a/src/diffusers/models/autoencoders/consistency_decoder_vae.py b/src/diffusers/models/autoencoders/consistency_decoder_vae.py index a97249f79473..4759b9141242 100644 --- a/src/diffusers/models/autoencoders/consistency_decoder_vae.py +++ b/src/diffusers/models/autoencoders/consistency_decoder_vae.py @@ -60,7 +60,7 @@ class ConsistencyDecoderVAE(ModelMixin, ConfigMixin): >>> vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=torch.float16) >>> pipe = StableDiffusionPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", vae=vae, torch_dtype=torch.float16 + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", vae=vae, torch_dtype=torch.float16 ... ).to("cuda") >>> image = pipe("horse", generator=torch.manual_seed(0)).images[0] diff --git a/src/diffusers/models/transformers/transformer_hunyuan_video.py b/src/diffusers/models/transformers/transformer_hunyuan_video.py index e3f24d97f3fa..6cb97af93652 100644 --- a/src/diffusers/models/transformers/transformer_hunyuan_video.py +++ b/src/diffusers/models/transformers/transformer_hunyuan_video.py @@ -721,6 +721,7 @@ def forward( for i in range(batch_size): attention_mask[i, : effective_sequence_length[i], : effective_sequence_length[i]] = True + attention_mask = attention_mask.unsqueeze(1) # [B, 1, N, N], for broadcasting across attention heads # 4. Transformer blocks if torch.is_grad_enabled() and self.gradient_checkpointing: diff --git a/src/diffusers/pipelines/auto_pipeline.py b/src/diffusers/pipelines/auto_pipeline.py index a3e2fc6de78f..8bbf1ebe9fa5 100644 --- a/src/diffusers/pipelines/auto_pipeline.py +++ b/src/diffusers/pipelines/auto_pipeline.py @@ -293,7 +293,7 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): If you get the error message below, you need to finetune the weights for your downstream task: ``` - Some weights of UNet2DConditionModel were not initialized from the model checkpoint at runwayml/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. ``` @@ -385,7 +385,7 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): ```py >>> from diffusers import AutoPipelineForText2Image - >>> pipeline = AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5") + >>> pipeline = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") >>> image = pipeline(prompt).images[0] ``` """ @@ -448,7 +448,7 @@ def from_pipe(cls, pipeline, **kwargs): >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image >>> pipe_i2i = AutoPipelineForImage2Image.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", requires_safety_checker=False ... ) >>> pipe_t2i = AutoPipelineForText2Image.from_pipe(pipe_i2i) @@ -589,7 +589,7 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): If you get the error message below, you need to finetune the weights for your downstream task: ``` - Some weights of UNet2DConditionModel were not initialized from the model checkpoint at runwayml/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. ``` @@ -681,7 +681,7 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): ```py >>> from diffusers import AutoPipelineForImage2Image - >>> pipeline = AutoPipelineForImage2Image.from_pretrained("runwayml/stable-diffusion-v1-5") + >>> pipeline = AutoPipelineForImage2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") >>> image = pipeline(prompt, image).images[0] ``` """ @@ -756,7 +756,7 @@ def from_pipe(cls, pipeline, **kwargs): >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image >>> pipe_t2i = AutoPipelineForText2Image.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", requires_safety_checker=False ... ) >>> pipe_i2i = AutoPipelineForImage2Image.from_pipe(pipe_t2i) @@ -900,7 +900,7 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): If you get the error message below, you need to finetune the weights for your downstream task: ``` - Some weights of UNet2DConditionModel were not initialized from the model checkpoint at runwayml/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. ``` @@ -992,7 +992,7 @@ def from_pretrained(cls, pretrained_model_or_path, **kwargs): ```py >>> from diffusers import AutoPipelineForInpainting - >>> pipeline = AutoPipelineForInpainting.from_pretrained("runwayml/stable-diffusion-v1-5") + >>> pipeline = AutoPipelineForInpainting.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") >>> image = pipeline(prompt, image=init_image, mask_image=mask_image).images[0] ``` """ diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index 582f51ab480e..1ae4c8d492e5 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -80,7 +80,7 @@ >>> # load control net and stable diffusion v1-5 >>> controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16) >>> pipe = StableDiffusionControlNetPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16 + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16 ... ) >>> # speed up diffusion process with faster scheduler and memory optimization @@ -198,8 +198,8 @@ class StableDiffusionControlNetPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index 59ac30d70d77..fbc9844e29a7 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -71,7 +71,7 @@ >>> # load control net and stable diffusion v1-5 >>> controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16) >>> pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16 + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16 ... ) >>> # speed up diffusion process with faster scheduler and memory optimization @@ -168,8 +168,8 @@ class StableDiffusionControlNetImg2ImgPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index 977b852a89c9..1f3ac038581e 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -83,7 +83,7 @@ ... "lllyasviel/control_v11p_sd15_inpaint", torch_dtype=torch.float16 ... ) >>> pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16 + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16 ... ) >>> pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) @@ -141,11 +141,11 @@ class StableDiffusionControlNetInpaintPipeline( This pipeline can be used with checkpoints that have been specifically fine-tuned for inpainting - ([runwayml/stable-diffusion-inpainting](https://huggingface.co/runwayml/stable-diffusion-inpainting)) as well as - default text-to-image Stable Diffusion checkpoints - ([runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)). Default text-to-image - Stable Diffusion checkpoints might be preferable for ControlNets that have been fine-tuned on those, such as - [lllyasviel/control_v11p_sd15_inpaint](https://huggingface.co/lllyasviel/control_v11p_sd15_inpaint). + ([stable-diffusion-v1-5/stable-diffusion-inpainting](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting)) + as well as default text-to-image Stable Diffusion checkpoints + ([stable-diffusion-v1-5/stable-diffusion-v1-5](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5)). + Default text-to-image Stable Diffusion checkpoints might be preferable for ControlNets that have been fine-tuned on + those, such as [lllyasviel/control_v11p_sd15_inpaint](https://huggingface.co/lllyasviel/control_v11p_sd15_inpaint). @@ -167,8 +167,8 @@ class StableDiffusionControlNetInpaintPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index c6c4ce935a1f..4ec78c5b990f 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -1622,7 +1622,7 @@ def denoising_value_valid(dnv): # 8. Check that sizes of mask, masked image and latents match if num_channels_unet == 9: - # default case for runwayml/stable-diffusion-inpainting + # default case for stable-diffusion-v1-5/stable-diffusion-inpainting num_channels_mask = mask.shape[1] num_channels_masked_image = masked_image_latents.shape[1] if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels: diff --git a/src/diffusers/pipelines/controlnet/pipeline_flax_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_flax_controlnet.py index 8a2cc08dbb2b..075df628d4f1 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_flax_controlnet.py @@ -75,7 +75,10 @@ ... "lllyasviel/sd-controlnet-canny", from_pt=True, dtype=jnp.float32 ... ) >>> pipe, params = FlaxStableDiffusionControlNetPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", controlnet=controlnet, revision="flax", dtype=jnp.float32 + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", + ... controlnet=controlnet, + ... revision="flax", + ... dtype=jnp.float32, ... ) >>> params["controlnet"] = controlnet_params @@ -132,8 +135,8 @@ class FlaxStableDiffusionControlNetPipeline(FlaxDiffusionPipeline): [`FlaxDPMSolverMultistepScheduler`]. safety_checker ([`FlaxStableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py index a1930da4180e..cfd251a72b35 100644 --- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py @@ -210,7 +210,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -224,7 +224,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py index e40b6efd71ab..612e5d57dff2 100644 --- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -238,7 +238,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -252,7 +252,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py index 777be883cb9d..340abcf69c5e 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py @@ -184,7 +184,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py index 0aa5e68bfcb4..e9553a8d99b0 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py @@ -93,7 +93,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -107,7 +107,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py index ce7ad3b0dfe9..5b77920a0c75 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py @@ -140,7 +140,7 @@ def __init__( ) deprecate("legacy is outdated", "1.0.0", deprecation_message, standard_warn=False) - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -154,7 +154,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py index f0f71080d0a3..ab68ffe33646 100644 --- a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +++ b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py @@ -34,21 +34,19 @@ EXAMPLE_DOC_STRING = """ Examples: ```py - >>> import PIL - >>> import requests >>> import torch - >>> from io import BytesIO >>> from diffusers import LEditsPPPipelineStableDiffusion >>> from diffusers.utils import load_image >>> pipe = LEditsPPPipelineStableDiffusion.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 + ... "runwayml/stable-diffusion-v1-5", variant="fp16", torch_dtype=torch.float16 ... ) + >>> pipe.enable_vae_tiling() >>> pipe = pipe.to("cuda") >>> img_url = "https://www.aiml.informatik.tu-darmstadt.de/people/mbrack/cherry_blossom.png" - >>> image = load_image(img_url).convert("RGB") + >>> image = load_image(img_url).resize((512, 512)) >>> _ = pipe.invert(image=image, num_inversion_steps=50, skip=0.1) @@ -152,7 +150,7 @@ def __init__(self, device): # The gaussian kernel is the product of the gaussian function of each dimension. kernel = 1 - meshgrids = torch.meshgrid([torch.arange(size, dtype=torch.float32) for size in kernel_size]) + meshgrids = torch.meshgrid([torch.arange(size, dtype=torch.float32) for size in kernel_size], indexing="ij") for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= 1 / (std * math.sqrt(2 * math.pi)) * torch.exp(-(((mgrid - mean) / (2 * std)) ** 2)) @@ -318,7 +316,7 @@ def __init__( "The scheduler has been changed to DPMSolverMultistepScheduler." ) - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -332,7 +330,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" @@ -706,6 +704,35 @@ def clip_skip(self): def cross_attention_kwargs(self): return self._cross_attention_kwargs + def enable_vae_slicing(self): + r""" + Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to + compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. + """ + self.vae.enable_slicing() + + def disable_vae_slicing(self): + r""" + Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_slicing() + + def enable_vae_tiling(self): + r""" + Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to + compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow + processing larger images. + """ + self.vae.enable_tiling() + + def disable_vae_tiling(self): + r""" + Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_tiling() + @torch.no_grad() @replace_example_docstring(EXAMPLE_DOC_STRING) def __call__( @@ -1271,6 +1298,8 @@ def invert( [`~pipelines.ledits_pp.LEditsPPInversionPipelineOutput`]: Output will contain the resized input image(s) and respective VAE reconstruction(s). """ + if height is not None and height % 32 != 0 or width is not None and width % 32 != 0: + raise ValueError("height and width must be a factor of 32.") # Reset attn processor, we do not want to store attn maps during inversion self.unet.set_attn_processor(AttnProcessor()) @@ -1360,6 +1389,12 @@ def encode_image(self, image, dtype=None, height=None, width=None, resize_mode=" image = self.image_processor.preprocess( image=image, height=height, width=width, resize_mode=resize_mode, crops_coords=crops_coords ) + height, width = image.shape[-2:] + if height % 32 != 0 or width % 32 != 0: + raise ValueError( + "Image height and width must be a factor of 32. " + "Consider down-sampling the input using the `height` and `width` parameters" + ) resized = self.image_processor.postprocess(image=image, output_type="pil") if max(image.shape[-2:]) > self.vae.config["sample_size"] * 1.5: diff --git a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py index 834445bfcd06..137e0c742c09 100644 --- a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py @@ -72,25 +72,18 @@ Examples: ```py >>> import torch - >>> import PIL - >>> import requests - >>> from io import BytesIO >>> from diffusers import LEditsPPPipelineStableDiffusionXL + >>> from diffusers.utils import load_image >>> pipe = LEditsPPPipelineStableDiffusionXL.from_pretrained( - ... "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ... "stabilityai/stable-diffusion-xl-base-1.0", variant="fp16", torch_dtype=torch.float16 ... ) + >>> pipe.enable_vae_tiling() >>> pipe = pipe.to("cuda") - - >>> def download_image(url): - ... response = requests.get(url) - ... return PIL.Image.open(BytesIO(response.content)).convert("RGB") - - >>> img_url = "https://www.aiml.informatik.tu-darmstadt.de/people/mbrack/tennis.jpg" - >>> image = download_image(img_url) + >>> image = load_image(img_url).resize((1024, 1024)) >>> _ = pipe.invert(image=image, num_inversion_steps=50, skip=0.2) @@ -197,7 +190,7 @@ def __init__(self, device): # The gaussian kernel is the product of the gaussian function of each dimension. kernel = 1 - meshgrids = torch.meshgrid([torch.arange(size, dtype=torch.float32) for size in kernel_size]) + meshgrids = torch.meshgrid([torch.arange(size, dtype=torch.float32) for size in kernel_size], indexing="ij") for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= 1 / (std * math.sqrt(2 * math.pi)) * torch.exp(-(((mgrid - mean) / (2 * std)) ** 2)) @@ -768,6 +761,35 @@ def denoising_end(self): def num_timesteps(self): return self._num_timesteps + def enable_vae_slicing(self): + r""" + Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to + compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. + """ + self.vae.enable_slicing() + + def disable_vae_slicing(self): + r""" + Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_slicing() + + def enable_vae_tiling(self): + r""" + Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to + compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow + processing larger images. + """ + self.vae.enable_tiling() + + def disable_vae_tiling(self): + r""" + Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_tiling() + # Copied from diffusers.pipelines.ledits_pp.pipeline_leditspp_stable_diffusion.LEditsPPPipelineStableDiffusion.prepare_unet def prepare_unet(self, attention_store, PnP: bool = False): attn_procs = {} @@ -1401,6 +1423,12 @@ def encode_image(self, image, dtype=None, height=None, width=None, resize_mode=" image = self.image_processor.preprocess( image=image, height=height, width=width, resize_mode=resize_mode, crops_coords=crops_coords ) + height, width = image.shape[-2:] + if height % 32 != 0 or width % 32 != 0: + raise ValueError( + "Image height and width must be a factor of 32. " + "Consider down-sampling the input using the `height` and `width` parameters" + ) resized = self.image_processor.postprocess(image=image, output_type="pil") if max(image.shape[-2:]) > self.vae.config["sample_size"] * 1.5: @@ -1439,6 +1467,10 @@ def invert( crops_coords_top_left: Tuple[int, int] = (0, 0), num_zero_noise_steps: int = 3, cross_attention_kwargs: Optional[Dict[str, Any]] = None, + height: Optional[int] = None, + width: Optional[int] = None, + resize_mode: Optional[str] = "default", + crops_coords: Optional[Tuple[int, int, int, int]] = None, ): r""" The function to the pipeline for image inversion as described by the [LEDITS++ @@ -1486,6 +1518,8 @@ def invert( [`~pipelines.ledits_pp.LEditsPPInversionPipelineOutput`]: Output will contain the resized input image(s) and respective VAE reconstruction(s). """ + if height is not None and height % 32 != 0 or width is not None and width % 32 != 0: + raise ValueError("height and width must be a factor of 32.") # Reset attn processor, we do not want to store attn maps during inversion self.unet.set_attn_processor(AttnProcessor()) @@ -1510,7 +1544,14 @@ def invert( do_classifier_free_guidance = source_guidance_scale > 1.0 # 1. prepare image - x0, resized = self.encode_image(image, dtype=self.text_encoder_2.dtype) + x0, resized = self.encode_image( + image, + dtype=self.text_encoder_2.dtype, + height=height, + width=width, + resize_mode=resize_mode, + crops_coords=crops_coords, + ) width = x0.shape[2] * self.vae_scale_factor height = x0.shape[3] * self.vae_scale_factor self.size = (height, width) diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd.py b/src/diffusers/pipelines/pag/pipeline_pag_sd.py index 6220a00f2c22..2e2d9afb9096 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd.py @@ -207,7 +207,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -221,7 +221,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_img2img.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_img2img.py index b7a695be17e5..81db8caf16f0 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_img2img.py @@ -202,7 +202,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -216,7 +216,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py index ff6ba8a6a853..800f512c061c 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py @@ -234,7 +234,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -248,7 +248,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/pipeline_flax_utils.py b/src/diffusers/pipelines/pipeline_flax_utils.py index f7b101124181..5486bc35f035 100644 --- a/src/diffusers/pipelines/pipeline_flax_utils.py +++ b/src/diffusers/pipelines/pipeline_flax_utils.py @@ -237,15 +237,15 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P If you get the error message below, you need to finetune the weights for your downstream task: ``` - Some weights of FlaxUNet2DConditionModel were not initialized from the model checkpoint at runwayml/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + Some weights of FlaxUNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: ``` Parameters: pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): Can be either: - - A string, the *repo id* (for example `runwayml/stable-diffusion-v1-5`) of a pretrained pipeline - hosted on the Hub. + - A string, the *repo id* (for example `stable-diffusion-v1-5/stable-diffusion-v1-5`) of a + pretrained pipeline hosted on the Hub. - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved using [`~FlaxDiffusionPipeline.save_pretrained`]. dtype (`str` or `jnp.dtype`, *optional*): @@ -293,7 +293,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P >>> # Requires to be logged in to Hugging Face hub, >>> # see more in [the documentation](https://huggingface.co/docs/hub/security-tokens) >>> pipeline, params = FlaxDiffusionPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", ... variant="bf16", ... dtype=jnp.bfloat16, ... ) @@ -301,7 +301,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P >>> # Download pipeline, but use a different scheduler >>> from diffusers import FlaxDPMSolverMultistepScheduler - >>> model_id = "runwayml/stable-diffusion-v1-5" + >>> model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" >>> dpmpp, dpmpp_state = FlaxDPMSolverMultistepScheduler.from_pretrained( ... model_id, ... subfolder="scheduler", @@ -559,7 +559,7 @@ def components(self) -> Dict[str, Any]: ... ) >>> text2img = FlaxStableDiffusionPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", variant="bf16", dtype=jnp.bfloat16 + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", variant="bf16", dtype=jnp.bfloat16 ... ) >>> img2img = FlaxStableDiffusionImg2ImgPipeline(**text2img.components) ``` diff --git a/src/diffusers/pipelines/pipeline_loading_utils.py b/src/diffusers/pipelines/pipeline_loading_utils.py index 0a7a222ec007..23f1279e203d 100644 --- a/src/diffusers/pipelines/pipeline_loading_utils.py +++ b/src/diffusers/pipelines/pipeline_loading_utils.py @@ -813,9 +813,9 @@ def _maybe_raise_warning_for_inpainting(pipeline_class, pretrained_model_name_or "You are using a legacy checkpoint for inpainting with Stable Diffusion, therefore we are loading the" f" {StableDiffusionInpaintPipelineLegacy} class instead of {StableDiffusionInpaintPipeline}. For" " better inpainting results, we strongly suggest using Stable Diffusion's official inpainting" - " checkpoint: https://huggingface.co/runwayml/stable-diffusion-inpainting instead or adapting your" + " checkpoint: https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting instead or adapting your" f" checkpoint {pretrained_model_name_or_path} to the format of" - " https://huggingface.co/runwayml/stable-diffusion-inpainting. Note that we do not actively maintain" + " https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting. Note that we do not actively maintain" " the {StableDiffusionInpaintPipelineLegacy} class and will likely remove it in version 1.0.0." ) deprecate("StableDiffusionInpaintPipelineLegacy", "1.0.0", deprecation_message, standard_warn=False) diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index c505c5a262a3..be900ca4469b 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -516,7 +516,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P If you get the error message below, you need to finetune the weights for your downstream task: ``` - Some weights of UNet2DConditionModel were not initialized from the model checkpoint at runwayml/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. ``` @@ -643,7 +643,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P >>> # Download pipeline that requires an authorization token >>> # For more information on access tokens, please refer to this section >>> # of the documentation](https://huggingface.co/docs/hub/security-tokens) - >>> pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") + >>> pipeline = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") >>> # Use a different scheduler >>> from diffusers import LMSDiscreteScheduler @@ -1555,7 +1555,7 @@ def components(self) -> Dict[str, Any]: ... StableDiffusionInpaintPipeline, ... ) - >>> text2img = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") + >>> text2img = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") >>> img2img = StableDiffusionImg2ImgPipeline(**text2img.components) >>> inpaint = StableDiffusionInpaintPipeline(**text2img.components) ``` @@ -1688,7 +1688,7 @@ def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto >>> from diffusers import StableDiffusionPipeline >>> pipe = StableDiffusionPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", ... torch_dtype=torch.float16, ... use_safetensors=True, ... ) @@ -1735,7 +1735,7 @@ def from_pipe(cls, pipeline, **kwargs): ```py >>> from diffusers import StableDiffusionPipeline, StableDiffusionSAGPipeline - >>> pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") + >>> pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") >>> new_pipe = StableDiffusionSAGPipeline.from_pipe(pipe) ``` """ diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py index 5d6ffd463cc3..9ecae6083eb6 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py @@ -55,7 +55,7 @@ >>> from diffusers import FlaxStableDiffusionPipeline >>> pipeline, params = FlaxStableDiffusionPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", variant="bf16", dtype=jax.numpy.bfloat16 + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", variant="bf16", dtype=jax.numpy.bfloat16 ... ) >>> prompt = "a photo of an astronaut riding a horse on mars" @@ -100,8 +100,8 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline): [`FlaxDPMSolverMultistepScheduler`]. safety_checker ([`FlaxStableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ @@ -141,8 +141,8 @@ def __init__( "The configuration file of the unet has set the default `sample_size` to smaller than" " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the" " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" - " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" - " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5" + " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" " in the config might lead to incorrect results in future versions. If you have downloaded this" " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py index 7792bc097595..ecfb8c16f62c 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py @@ -124,8 +124,8 @@ class FlaxStableDiffusionImg2ImgPipeline(FlaxDiffusionPipeline): [`FlaxDPMSolverMultistepScheduler`]. safety_checker ([`FlaxStableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py index f6bb0ac299b3..338220ae3940 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py @@ -127,8 +127,8 @@ class FlaxStableDiffusionInpaintPipeline(FlaxDiffusionPipeline): [`FlaxDPMSolverMultistepScheduler`]. safety_checker ([`FlaxStableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ @@ -168,8 +168,8 @@ def __init__( "The configuration file of the unet has set the default `sample_size` to smaller than" " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the" " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" - " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" - " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5" + " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" " in the config might lead to incorrect results in future versions. If you have downloaded this" " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py index 2e34dcb83c01..9917276e0a1f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py @@ -57,7 +57,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -71,7 +71,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index c39409886bd9..92c82d61b8f2 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -78,7 +78,8 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline): [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details. + Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + details. feature_extractor ([`CLIPImageProcessor`]): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ @@ -109,7 +110,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -123,7 +124,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py index 18d8050826cc..ddd2e27dedaf 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py @@ -76,7 +76,8 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline): [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details. + Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + details. feature_extractor ([`CLIPImageProcessor`]): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ @@ -108,7 +109,7 @@ def __init__( super().__init__() logger.info("`OnnxStableDiffusionInpaintPipeline` is experimental and will very likely change in the future.") - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -122,7 +123,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py index cd9ec57fb879..ef84cdd38b6d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py @@ -83,7 +83,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -97,7 +97,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index ac6c8253e432..959c8135f73b 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -55,7 +55,9 @@ >>> import torch >>> from diffusers import StableDiffusionPipeline - >>> pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16) + >>> pipe = StableDiffusionPipeline.from_pretrained( + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16 + ... ) >>> pipe = pipe.to("cuda") >>> prompt = "a photo of an astronaut riding a horse on mars" @@ -184,8 +186,8 @@ class StableDiffusionPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ @@ -209,7 +211,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -223,7 +225,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" @@ -266,8 +268,8 @@ def __init__( "The configuration file of the unet has set the default `sample_size` to smaller than" " 64 which seems highly unlikely. If your checkpoint is a fine-tuned version of any of the" " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" - " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" - " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5" + " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" " in the config might lead to incorrect results in future versions. If you have downloaded this" " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py index 7801b0d01dff..9e758d91cadd 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py @@ -124,8 +124,8 @@ def __init__( "The configuration file of the unet has set the default `sample_size` to smaller than" " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the" " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" - " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" - " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5" + " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" " in the config might lead to incorrect results in future versions. If you have downloaded this" " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py index 93a8bd160318..fb80bb34b3ba 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py @@ -57,8 +57,8 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ @@ -106,8 +106,8 @@ def __init__( "The configuration file of the unet has set the default `sample_size` to smaller than" " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the" " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" - " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" - " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5" + " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" " in the config might lead to incorrect results in future versions. If you have downloaded this" " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 9cd5673c9359..a1ae273add62 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -56,7 +56,7 @@ >>> from diffusers import StableDiffusionImg2ImgPipeline >>> device = "cuda" - >>> model_id_or_path = "runwayml/stable-diffusion-v1-5" + >>> model_id_or_path = "stable-diffusion-v1-5/stable-diffusion-v1-5" >>> pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16) >>> pipe = pipe.to(device) @@ -205,8 +205,8 @@ class StableDiffusionImg2ImgPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ @@ -230,7 +230,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -244,7 +244,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" @@ -282,8 +282,8 @@ def __init__( "The configuration file of the unet has set the default `sample_size` to smaller than" " 64 which seems highly unlikely. If your checkpoint is a fine-tuned version of any of the" " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" - " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" - " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5" + " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" " in the config might lead to incorrect results in future versions. If you have downloaded this" " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 49c38c800942..db4c687f991d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -146,8 +146,8 @@ class StableDiffusionInpaintPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ @@ -171,7 +171,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -185,7 +185,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "skip_prk_steps") and scheduler.config.skip_prk_steps is False: + if scheduler is not None and getattr(scheduler.config, "skip_prk_steps", True) is False: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration" " `skip_prk_steps`. `skip_prk_steps` should be set to True in the configuration file. Please make" @@ -224,8 +224,8 @@ def __init__( "The configuration file of the unet has set the default `sample_size` to smaller than" " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the" " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" - " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" - " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5" + " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" " in the config might lead to incorrect results in future versions. If you have downloaded this" " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" @@ -1014,7 +1014,7 @@ def __call__( >>> mask_image = download_image(mask_url).resize((512, 512)) >>> pipe = StableDiffusionInpaintPipeline.from_pretrained( - ... "runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16 + ... "stable-diffusion-v1-5/stable-diffusion-inpainting", torch_dtype=torch.float16 ... ) >>> pipe = pipe.to("cuda") @@ -1200,7 +1200,7 @@ def __call__( # 8. Check that sizes of mask, masked image and latents match if num_channels_unet == 9: - # default case for runwayml/stable-diffusion-inpainting + # default case for stable-diffusion-v1-5/stable-diffusion-inpainting num_channels_mask = mask.shape[1] num_channels_masked_image = masked_image_latents.shape[1] if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels: diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py index fd89b195c778..76b4f285b50f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py @@ -22,16 +22,23 @@ from ...callbacks import MultiPipelineCallbacks, PipelineCallback from ...image_processor import PipelineImageInput, VaeImageProcessor -from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin +from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers -from ...utils import PIL_INTERPOLATION, deprecate, logging +from ...utils import PIL_INTERPOLATION, deprecate, is_torch_xla_available, logging from ...utils.torch_utils import randn_tensor from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker +if is_torch_xla_available(): + import torch_xla.core.xla_model as xm + + XLA_AVAILABLE = True +else: + XLA_AVAILABLE = False + logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -79,6 +86,7 @@ class StableDiffusionInstructPix2PixPipeline( TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, IPAdapterMixin, + FromSingleFileMixin, ): r""" Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion). @@ -106,8 +114,8 @@ class StableDiffusionInstructPix2PixPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ @@ -457,6 +465,9 @@ def __call__( step_idx = i // getattr(self.scheduler, "order", 1) callback(step_idx, t, latents) + if XLA_AVAILABLE: + xm.mark_step() + if not output_type == "latent": image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0] image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) diff --git a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py index 8f40fa72a25c..2147d42a9f38 100644 --- a/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +++ b/src/diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py @@ -194,8 +194,8 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, StableDiffusionM [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py index 2b86470dbff1..978ab165f891 100644 --- a/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +++ b/src/diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py @@ -268,8 +268,8 @@ class StableDiffusionDiffEditPipeline( A scheduler to be used in combination with `unet` to fill in the unmasked part of the input latents. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ @@ -292,7 +292,7 @@ def __init__( ): super().__init__() - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -306,7 +306,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "skip_prk_steps") and scheduler.config.skip_prk_steps is False: + if scheduler is not None and getattr(scheduler.config, "skip_prk_steps", True) is False: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration" " `skip_prk_steps`. `skip_prk_steps` should be set to True in the configuration file. Please make" @@ -345,8 +345,8 @@ def __init__( "The configuration file of the unet has set the default `sample_size` to smaller than" " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the" " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" - " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" - " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5" + " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" " in the config might lead to incorrect results in future versions. If you have downloaded this" " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py index 52ccd5612776..ce34691eba7c 100644 --- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py @@ -120,8 +120,8 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin): [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py index 6c36ec173539..3c147b64898d 100644 --- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py @@ -172,8 +172,8 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py index 122701ff923f..664c0810d8cf 100755 --- a/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -83,7 +83,8 @@ class StableDiffusionKDiffusionPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details. + Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + details. feature_extractor ([`CLIPImageProcessor`]): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py index 81bb0e9a7270..a42c865317a9 100644 --- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py @@ -203,8 +203,8 @@ class StableDiffusionLDM3DPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py index 2fc79c0610f0..e200a85f4b55 100644 --- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py @@ -179,8 +179,8 @@ class StableDiffusionPanoramaPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py index cd59cf51869d..dc94ea960c8f 100644 --- a/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +++ b/src/diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py @@ -46,8 +46,8 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ @@ -74,7 +74,7 @@ def __init__( " abuse, brutality, cruelty" ) - if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " @@ -88,7 +88,7 @@ def __init__( new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) - if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." " `clip_sample` should be set to False in the configuration file. Please make sure to update the" @@ -126,8 +126,8 @@ def __init__( "The configuration file of the unet has set the default `sample_size` to smaller than" " 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the" " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" - " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" - " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5" + " \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" " in the config might lead to incorrect results in future versions. If you have downloaded this" " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" diff --git a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py index c32052d2e4d0..06d463c98f6b 100644 --- a/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +++ b/src/diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py @@ -47,7 +47,7 @@ >>> from diffusers import StableDiffusionSAGPipeline >>> pipe = StableDiffusionSAGPipeline.from_pretrained( - ... "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16 ... ) >>> pipe = pipe.to("cuda") @@ -123,8 +123,8 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`~transformers.CLIPImageProcessor`]): A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py index 1a938aaf9423..ea7e99dafd51 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py @@ -208,7 +208,8 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin): [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details. + Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + details. feature_extractor ([`CLIPImageProcessor`]): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 20569d0adb32..b51bedf7ee56 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -248,7 +248,8 @@ class StableDiffusionXLAdapterPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details. + Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + details. feature_extractor ([`CLIPImageProcessor`]): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py index f7f5d86a0888..11fef4f16c90 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py @@ -322,8 +322,8 @@ class TextToVideoZeroPipeline( [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. - Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details - about a model's potential harms. + Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for + more details about a model's potential harms. feature_extractor ([`CLIPImageProcessor`]): A [`CLIPImageProcessor`] to extract features from generated images; used as inputs to the `safety_checker`. """ diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index 9fa968c47107..ace0ad6b6044 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -706,7 +706,7 @@ def test_lora_unload_with_parameter_expanded_shapes(self): self.assertTrue(pipe.transformer.config.in_channels == 2 * in_features) self.assertTrue(cap_logger.out.startswith("Expanding the nn.Linear input/output features for module")) - control_pipe.unload_lora_weights() + control_pipe.unload_lora_weights(reset_to_overwritten_params=True) self.assertTrue( control_pipe.transformer.config.in_channels == num_channels_without_control, f"Expected {num_channels_without_control} channels in the modified transformer but has {control_pipe.transformer.config.in_channels=}", @@ -724,6 +724,65 @@ def test_lora_unload_with_parameter_expanded_shapes(self): self.assertTrue(pipe.transformer.x_embedder.weight.data.shape[1] == in_features) self.assertTrue(pipe.transformer.config.in_channels == in_features) + def test_lora_unload_with_parameter_expanded_shapes_and_no_reset(self): + components, _, _ = self.get_dummy_components(FlowMatchEulerDiscreteScheduler) + + logger = logging.get_logger("diffusers.loaders.lora_pipeline") + logger.setLevel(logging.DEBUG) + + # Change the transformer config to mimic a real use case. + num_channels_without_control = 4 + transformer = FluxTransformer2DModel.from_config( + components["transformer"].config, in_channels=num_channels_without_control + ).to(torch_device) + self.assertTrue( + transformer.config.in_channels == num_channels_without_control, + f"Expected {num_channels_without_control} channels in the modified transformer but has {transformer.config.in_channels=}", + ) + + # This should be initialized with a Flux pipeline variant that doesn't accept `control_image`. + components["transformer"] = transformer + pipe = FluxPipeline(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + + _, _, inputs = self.get_dummy_inputs(with_generator=False) + control_image = inputs.pop("control_image") + original_out = pipe(**inputs, generator=torch.manual_seed(0))[0] + + control_pipe = self.pipeline_class(**components) + out_features, in_features = control_pipe.transformer.x_embedder.weight.shape + rank = 4 + + dummy_lora_A = torch.nn.Linear(2 * in_features, rank, bias=False) + dummy_lora_B = torch.nn.Linear(rank, out_features, bias=False) + lora_state_dict = { + "transformer.x_embedder.lora_A.weight": dummy_lora_A.weight, + "transformer.x_embedder.lora_B.weight": dummy_lora_B.weight, + } + with CaptureLogger(logger) as cap_logger: + control_pipe.load_lora_weights(lora_state_dict, "adapter-1") + self.assertTrue(check_if_lora_correctly_set(pipe.transformer), "Lora not correctly set in denoiser") + + inputs["control_image"] = control_image + lora_out = control_pipe(**inputs, generator=torch.manual_seed(0))[0] + + self.assertFalse(np.allclose(original_out, lora_out, rtol=1e-4, atol=1e-4)) + self.assertTrue(pipe.transformer.x_embedder.weight.data.shape[1] == 2 * in_features) + self.assertTrue(pipe.transformer.config.in_channels == 2 * in_features) + self.assertTrue(cap_logger.out.startswith("Expanding the nn.Linear input/output features for module")) + + control_pipe.unload_lora_weights(reset_to_overwritten_params=False) + self.assertTrue( + control_pipe.transformer.config.in_channels == 2 * num_channels_without_control, + f"Expected {num_channels_without_control} channels in the modified transformer but has {control_pipe.transformer.config.in_channels=}", + ) + no_lora_out = control_pipe(**inputs, generator=torch.manual_seed(0))[0] + + self.assertFalse(np.allclose(no_lora_out, lora_out, rtol=1e-4, atol=1e-4)) + self.assertTrue(pipe.transformer.x_embedder.weight.data.shape[1] == in_features * 2) + self.assertTrue(pipe.transformer.config.in_channels == in_features * 2) + @unittest.skip("Not supported in Flux.") def test_simple_inference_with_text_denoiser_block_scale_for_all_dict_options(self): pass diff --git a/tests/lora/test_lora_layers_hunyuanvideo.py b/tests/lora/test_lora_layers_hunyuanvideo.py index 8bda98438571..d2015d8b0711 100644 --- a/tests/lora/test_lora_layers_hunyuanvideo.py +++ b/tests/lora/test_lora_layers_hunyuanvideo.py @@ -12,9 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import gc import sys import unittest +import numpy as np +import pytest import torch from transformers import CLIPTextModel, CLIPTokenizer, LlamaModel, LlamaTokenizerFast @@ -26,7 +29,11 @@ ) from diffusers.utils.testing_utils import ( floats_tensor, + nightly, + numpy_cosine_similarity_distance, + require_big_gpu_with_torch_cuda, require_peft_backend, + require_torch_gpu, skip_mps, ) @@ -182,3 +189,69 @@ def test_simple_inference_with_text_lora_fused(self): @unittest.skip("Text encoder LoRA is not supported in HunyuanVideo.") def test_simple_inference_with_text_lora_save_load(self): pass + + +@nightly +@require_torch_gpu +@require_peft_backend +@require_big_gpu_with_torch_cuda +@pytest.mark.big_gpu_with_torch_cuda +class HunyuanVideoLoRAIntegrationTests(unittest.TestCase): + """internal note: The integration slices were obtained on DGX. + + torch: 2.5.1+cu124 with CUDA 12.5. Need the same setup for the + assertions to pass. + """ + + num_inference_steps = 10 + seed = 0 + + def setUp(self): + super().setUp() + + gc.collect() + torch.cuda.empty_cache() + + model_id = "hunyuanvideo-community/HunyuanVideo" + transformer = HunyuanVideoTransformer3DModel.from_pretrained( + model_id, subfolder="transformer", torch_dtype=torch.bfloat16 + ) + self.pipeline = HunyuanVideoPipeline.from_pretrained( + model_id, transformer=transformer, torch_dtype=torch.float16 + ).to("cuda") + + def tearDown(self): + super().tearDown() + + gc.collect() + torch.cuda.empty_cache() + + def test_original_format_cseti(self): + self.pipeline.load_lora_weights( + "Cseti/HunyuanVideo-LoRA-Arcane_Jinx-v1", weight_name="csetiarcane-nfjinx-v1-6000.safetensors" + ) + self.pipeline.fuse_lora() + self.pipeline.unload_lora_weights() + self.pipeline.vae.enable_tiling() + + prompt = "CSETIARCANE. A cat walks on the grass, realistic" + + out = self.pipeline( + prompt=prompt, + height=320, + width=512, + num_frames=9, + num_inference_steps=self.num_inference_steps, + output_type="np", + generator=torch.manual_seed(self.seed), + ).frames[0] + out = out.flatten() + out_slice = np.concatenate((out[:8], out[-8:])) + + # fmt: off + expected_slice = np.array([0.1013, 0.1924, 0.0078, 0.1021, 0.1929, 0.0078, 0.1023, 0.1919, 0.7402, 0.104, 0.4482, 0.7354, 0.0925, 0.4382, 0.7275, 0.0815]) + # fmt: on + + max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice) + + assert max_diff < 1e-3 diff --git a/tests/single_file/test_stable_diffusion_single_file.py b/tests/single_file/test_stable_diffusion_single_file.py index 71afda1b80bb..dd15a5c7c071 100644 --- a/tests/single_file/test_stable_diffusion_single_file.py +++ b/tests/single_file/test_stable_diffusion_single_file.py @@ -4,11 +4,13 @@ import torch -from diffusers import EulerDiscreteScheduler, StableDiffusionPipeline +from diffusers import EulerDiscreteScheduler, StableDiffusionInstructPix2PixPipeline, StableDiffusionPipeline from diffusers.loaders.single_file_utils import _extract_repo_id_and_weights_name +from diffusers.utils import load_image from diffusers.utils.testing_utils import ( backend_empty_cache, enable_full_determinism, + nightly, require_torch_accelerator, slow, torch_device, @@ -118,3 +120,44 @@ def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0 def test_single_file_format_inference_is_same_as_pretrained(self): super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3) + + +@nightly +@slow +@require_torch_accelerator +class StableDiffusionInstructPix2PixPipelineSingleFileSlowTests(unittest.TestCase, SDSingleFileTesterMixin): + pipeline_class = StableDiffusionInstructPix2PixPipeline + ckpt_path = "https://huggingface.co/timbrooks/instruct-pix2pix/blob/main/instruct-pix2pix-00-22000.safetensors" + original_config = ( + "https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/refs/heads/main/configs/generate.yaml" + ) + repo_id = "timbrooks/instruct-pix2pix" + + def setUp(self): + super().setUp() + gc.collect() + backend_empty_cache(torch_device) + + def tearDown(self): + super().tearDown() + gc.collect() + backend_empty_cache(torch_device) + + def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): + generator = torch.Generator(device=generator_device).manual_seed(seed) + image = load_image( + "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main/stable_diffusion_pix2pix/example.jpg" + ) + inputs = { + "prompt": "turn him into a cyborg", + "image": image, + "generator": generator, + "num_inference_steps": 3, + "guidance_scale": 7.5, + "image_guidance_scale": 1.0, + "output_type": "np", + } + return inputs + + def test_single_file_format_inference_is_same_as_pretrained(self): + super().test_single_file_format_inference_is_same_as_pretrained(expected_max_diff=1e-3)