From 72b0cce27d350be33524e234ac903f8e8daa2fce Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 11 Dec 2023 10:29:57 +0530 Subject: [PATCH 1/5] feat: introduce autoencoders module --- src/diffusers/models/__init__.py | 20 +++++++------ src/diffusers/models/autoencoders/__init__.py | 5 ++++ .../{ => autoencoders}/autoencoder_asym_kl.py | 8 +++--- .../{ => autoencoders}/autoencoder_kl.py | 12 ++++---- .../autoencoder_kl_temporal_decoder.py | 16 +++++------ .../{ => autoencoders}/autoencoder_tiny.py | 8 +++--- .../consistency_decoder_vae.py | 28 +++++++++---------- .../models/{ => autoencoders}/vae.py | 10 +++---- .../wuerstchen/modeling_paella_vq_model.py | 2 +- 9 files changed, 58 insertions(+), 51 deletions(-) create mode 100644 src/diffusers/models/autoencoders/__init__.py rename src/diffusers/models/{ => autoencoders}/autoencoder_asym_kl.py (97%) rename src/diffusers/models/{ => autoencoders}/autoencoder_kl.py (98%) rename src/diffusers/models/{ => autoencoders}/autoencoder_kl_temporal_decoder.py (97%) rename src/diffusers/models/{ => autoencoders}/autoencoder_tiny.py (98%) rename src/diffusers/models/{ => autoencoders}/consistency_decoder_vae.py (95%) rename src/diffusers/models/{ => autoencoders}/vae.py (99%) diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py index ec1c7ab43494..99a14fed52f3 100644 --- a/src/diffusers/models/__init__.py +++ b/src/diffusers/models/__init__.py @@ -26,11 +26,11 @@ if is_torch_available(): _import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"] - _import_structure["autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"] - _import_structure["autoencoder_kl"] = ["AutoencoderKL"] _import_structure["autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"] - _import_structure["autoencoder_tiny"] = ["AutoencoderTiny"] - _import_structure["consistency_decoder_vae"] = ["ConsistencyDecoderVAE"] + _import_structure["autoencoders.autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"] + _import_structure["autoencoders.autoencoder_kl"] = ["AutoencoderKL"] + _import_structure["autoencoders.autoencoder_tiny"] = ["AutoencoderTiny"] + _import_structure["autoencoders.consistency_decoder_vae"] = ["ConsistencyDecoderVAE"] _import_structure["controlnet"] = ["ControlNetModel"] _import_structure["controlnetxs"] = ["ControlNetXSModel"] _import_structure["dual_transformer_2d"] = ["DualTransformer2DModel"] @@ -58,11 +58,13 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: if is_torch_available(): from .adapter import MultiAdapter, T2IAdapter - from .autoencoder_asym_kl import AsymmetricAutoencoderKL - from .autoencoder_kl import AutoencoderKL - from .autoencoder_kl_temporal_decoder import AutoencoderKLTemporalDecoder - from .autoencoder_tiny import AutoencoderTiny - from .consistency_decoder_vae import ConsistencyDecoderVAE + from .autoencoders import ( + AsymmetricAutoencoderKL, + AutoencoderKL, + AutoencoderKLTemporalDecoder, + AutoencoderTiny, + ConsistencyDecoderVAE, + ) from .controlnet import ControlNetModel from .controlnetxs import ControlNetXSModel from .dual_transformer_2d import DualTransformer2DModel diff --git a/src/diffusers/models/autoencoders/__init__.py b/src/diffusers/models/autoencoders/__init__.py new file mode 100644 index 000000000000..201a40ff17b2 --- /dev/null +++ b/src/diffusers/models/autoencoders/__init__.py @@ -0,0 +1,5 @@ +from .autoencoder_asym_kl import AsymmetricAutoencoderKL +from .autoencoder_kl import AutoencoderKL +from .autoencoder_kl_temporal_decoder import AutoencoderKLTemporalDecoder +from .autoencoder_tiny import AutoencoderTiny +from .consistency_decoder_vae import ConsistencyDecoderVAE diff --git a/src/diffusers/models/autoencoder_asym_kl.py b/src/diffusers/models/autoencoders/autoencoder_asym_kl.py similarity index 97% rename from src/diffusers/models/autoencoder_asym_kl.py rename to src/diffusers/models/autoencoders/autoencoder_asym_kl.py index 678e47234096..9114650619fc 100644 --- a/src/diffusers/models/autoencoder_asym_kl.py +++ b/src/diffusers/models/autoencoders/autoencoder_asym_kl.py @@ -16,10 +16,10 @@ import torch import torch.nn as nn -from ..configuration_utils import ConfigMixin, register_to_config -from ..utils.accelerate_utils import apply_forward_hook -from .modeling_outputs import AutoencoderKLOutput -from .modeling_utils import ModelMixin +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils.accelerate_utils import apply_forward_hook +from ..modeling_outputs import AutoencoderKLOutput +from ..modeling_utils import ModelMixin from .vae import DecoderOutput, DiagonalGaussianDistribution, Encoder, MaskConditionDecoder diff --git a/src/diffusers/models/autoencoder_kl.py b/src/diffusers/models/autoencoders/autoencoder_kl.py similarity index 98% rename from src/diffusers/models/autoencoder_kl.py rename to src/diffusers/models/autoencoders/autoencoder_kl.py index 8fa3574125f9..ae2d90c548f8 100644 --- a/src/diffusers/models/autoencoder_kl.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl.py @@ -16,10 +16,10 @@ import torch import torch.nn as nn -from ..configuration_utils import ConfigMixin, register_to_config -from ..loaders import FromOriginalVAEMixin -from ..utils.accelerate_utils import apply_forward_hook -from .attention_processor import ( +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalVAEMixin +from ...utils.accelerate_utils import apply_forward_hook +from ..attention_processor import ( ADDED_KV_ATTENTION_PROCESSORS, CROSS_ATTENTION_PROCESSORS, Attention, @@ -27,8 +27,8 @@ AttnAddedKVProcessor, AttnProcessor, ) -from .modeling_outputs import AutoencoderKLOutput -from .modeling_utils import ModelMixin +from ..modeling_outputs import AutoencoderKLOutput +from ..modeling_utils import ModelMixin from .vae import Decoder, DecoderOutput, DiagonalGaussianDistribution, Encoder diff --git a/src/diffusers/models/autoencoder_kl_temporal_decoder.py b/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py similarity index 97% rename from src/diffusers/models/autoencoder_kl_temporal_decoder.py rename to src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py index 176b6e0df924..0b7f8d1f5336 100644 --- a/src/diffusers/models/autoencoder_kl_temporal_decoder.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py @@ -16,14 +16,14 @@ import torch import torch.nn as nn -from ..configuration_utils import ConfigMixin, register_to_config -from ..loaders import FromOriginalVAEMixin -from ..utils import is_torch_version -from ..utils.accelerate_utils import apply_forward_hook -from .attention_processor import CROSS_ATTENTION_PROCESSORS, AttentionProcessor, AttnProcessor -from .modeling_outputs import AutoencoderKLOutput -from .modeling_utils import ModelMixin -from .unet_3d_blocks import MidBlockTemporalDecoder, UpBlockTemporalDecoder +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalVAEMixin +from ...utils import is_torch_version +from ...utils.accelerate_utils import apply_forward_hook +from ..attention_processor import CROSS_ATTENTION_PROCESSORS, AttentionProcessor, AttnProcessor +from ..modeling_outputs import AutoencoderKLOutput +from ..modeling_utils import ModelMixin +from ..unet_3d_blocks import MidBlockTemporalDecoder, UpBlockTemporalDecoder from .vae import DecoderOutput, DiagonalGaussianDistribution, Encoder diff --git a/src/diffusers/models/autoencoder_tiny.py b/src/diffusers/models/autoencoders/autoencoder_tiny.py similarity index 98% rename from src/diffusers/models/autoencoder_tiny.py rename to src/diffusers/models/autoencoders/autoencoder_tiny.py index 56ccf30e0402..08b1c0e74d70 100644 --- a/src/diffusers/models/autoencoder_tiny.py +++ b/src/diffusers/models/autoencoders/autoencoder_tiny.py @@ -18,10 +18,10 @@ import torch -from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import BaseOutput -from ..utils.accelerate_utils import apply_forward_hook -from .modeling_utils import ModelMixin +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import BaseOutput +from ...utils.accelerate_utils import apply_forward_hook +from ..modeling_utils import ModelMixin from .vae import DecoderOutput, DecoderTiny, EncoderTiny diff --git a/src/diffusers/models/consistency_decoder_vae.py b/src/diffusers/models/autoencoders/consistency_decoder_vae.py similarity index 95% rename from src/diffusers/models/consistency_decoder_vae.py rename to src/diffusers/models/autoencoders/consistency_decoder_vae.py index 34176a35e835..d92423eafc31 100644 --- a/src/diffusers/models/consistency_decoder_vae.py +++ b/src/diffusers/models/autoencoders/consistency_decoder_vae.py @@ -18,20 +18,20 @@ import torch.nn.functional as F from torch import nn -from ..configuration_utils import ConfigMixin, register_to_config -from ..schedulers import ConsistencyDecoderScheduler -from ..utils import BaseOutput -from ..utils.accelerate_utils import apply_forward_hook -from ..utils.torch_utils import randn_tensor -from .attention_processor import ( +from ...configuration_utils import ConfigMixin, register_to_config +from ...schedulers import ConsistencyDecoderScheduler +from ...utils import BaseOutput +from ...utils.accelerate_utils import apply_forward_hook +from ...utils.torch_utils import randn_tensor +from ..attention_processor import ( ADDED_KV_ATTENTION_PROCESSORS, CROSS_ATTENTION_PROCESSORS, AttentionProcessor, AttnAddedKVProcessor, AttnProcessor, ) -from .modeling_utils import ModelMixin -from .unet_2d import UNet2DModel +from ..modeling_utils import ModelMixin +from ..unet_2d import UNet2DModel from .vae import DecoderOutput, DiagonalGaussianDistribution, Encoder @@ -153,7 +153,7 @@ def __init__( self.use_slicing = False self.use_tiling = False - # Copied from diffusers.models.autoencoder_kl.AutoencoderKL.enable_tiling + # Copied from diffusers.models.autoencoders.autoencoder_kl.AutoencoderKL.enable_tiling def enable_tiling(self, use_tiling: bool = True): r""" Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to @@ -162,7 +162,7 @@ def enable_tiling(self, use_tiling: bool = True): """ self.use_tiling = use_tiling - # Copied from diffusers.models.autoencoder_kl.AutoencoderKL.disable_tiling + # Copied from diffusers.models.autoencoders.autoencoder_kl.AutoencoderKL.disable_tiling def disable_tiling(self): r""" Disable tiled VAE decoding. If `enable_tiling` was previously enabled, this method will go back to computing @@ -170,7 +170,7 @@ def disable_tiling(self): """ self.enable_tiling(False) - # Copied from diffusers.models.autoencoder_kl.AutoencoderKL.enable_slicing + # Copied from diffusers.models.autoencoders.autoencoder_kl.AutoencoderKL.enable_slicing def enable_slicing(self): r""" Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to @@ -178,7 +178,7 @@ def enable_slicing(self): """ self.use_slicing = True - # Copied from diffusers.models.autoencoder_kl.AutoencoderKL.disable_slicing + # Copied from diffusers.models.autoencoders.autoencoder_kl.AutoencoderKL.disable_slicing def disable_slicing(self): r""" Disable sliced VAE decoding. If `enable_slicing` was previously enabled, this method will go back to computing @@ -333,14 +333,14 @@ def decode( return DecoderOutput(sample=x_0) - # Copied from diffusers.models.autoencoder_kl.AutoencoderKL.blend_v + # Copied from diffusers.models.autoencoders.autoencoder_kl.AutoencoderKL.blend_v def blend_v(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int) -> torch.Tensor: blend_extent = min(a.shape[2], b.shape[2], blend_extent) for y in range(blend_extent): b[:, :, y, :] = a[:, :, -blend_extent + y, :] * (1 - y / blend_extent) + b[:, :, y, :] * (y / blend_extent) return b - # Copied from diffusers.models.autoencoder_kl.AutoencoderKL.blend_h + # Copied from diffusers.models.autoencoders.autoencoder_kl.AutoencoderKL.blend_h def blend_h(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int) -> torch.Tensor: blend_extent = min(a.shape[3], b.shape[3], blend_extent) for x in range(blend_extent): diff --git a/src/diffusers/models/vae.py b/src/diffusers/models/autoencoders/vae.py similarity index 99% rename from src/diffusers/models/vae.py rename to src/diffusers/models/autoencoders/vae.py index 0049456e2187..9ed0232e6983 100644 --- a/src/diffusers/models/vae.py +++ b/src/diffusers/models/autoencoders/vae.py @@ -18,11 +18,11 @@ import torch import torch.nn as nn -from ..utils import BaseOutput, is_torch_version -from ..utils.torch_utils import randn_tensor -from .activations import get_activation -from .attention_processor import SpatialNorm -from .unet_2d_blocks import ( +from ...utils import BaseOutput, is_torch_version +from ...utils.torch_utils import randn_tensor +from ..activations import get_activation +from ..attention_processor import SpatialNorm +from ..unet_2d_blocks import ( AutoencoderTinyBlock, UNetMidBlock2D, get_down_block, diff --git a/src/diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py b/src/diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py index 7ee42faa0e82..3115cc2d9d3d 100644 --- a/src/diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +++ b/src/diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py @@ -19,8 +19,8 @@ import torch.nn as nn from ...configuration_utils import ConfigMixin, register_to_config +from ...models.autoencoders.vae import DecoderOutput, VectorQuantizer from ...models.modeling_utils import ModelMixin -from ...models.vae import DecoderOutput, VectorQuantizer from ...models.vq_model import VQEncoderOutput from ...utils.accelerate_utils import apply_forward_hook From ce20aa1e3cd228df6f6c2c4bfed64d99171ec467 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 11 Dec 2023 10:30:48 +0530 Subject: [PATCH 2/5] more changes for styling and copy fixing --- scripts/convert_consistency_decoder.py | 2 +- src/diffusers/models/vq_model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/convert_consistency_decoder.py b/scripts/convert_consistency_decoder.py index 6a294038a5a3..3319f4c4665e 100644 --- a/scripts/convert_consistency_decoder.py +++ b/scripts/convert_consistency_decoder.py @@ -12,9 +12,9 @@ from tqdm import tqdm from diffusers import AutoencoderKL, ConsistencyDecoderVAE, DiffusionPipeline, StableDiffusionPipeline, UNet2DModel +from diffusers.models.autoencoders.vae import Encoder from diffusers.models.embeddings import TimestepEmbedding from diffusers.models.unet_2d_blocks import ResnetDownsampleBlock2D, ResnetUpsampleBlock2D, UNetMidBlock2D -from diffusers.models.vae import Encoder args = ArgumentParser() diff --git a/src/diffusers/models/vq_model.py b/src/diffusers/models/vq_model.py index f4a6c8fb227f..bfe62ec863b3 100644 --- a/src/diffusers/models/vq_model.py +++ b/src/diffusers/models/vq_model.py @@ -20,8 +20,8 @@ from ..configuration_utils import ConfigMixin, register_to_config from ..utils import BaseOutput from ..utils.accelerate_utils import apply_forward_hook +from .autoencoders.vae import Decoder, DecoderOutput, Encoder, VectorQuantizer from .modeling_utils import ModelMixin -from .vae import Decoder, DecoderOutput, Encoder, VectorQuantizer @dataclass From 43ca493f8834e0ab940eb981409bfc91f01848f4 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 11 Dec 2023 10:33:12 +0530 Subject: [PATCH 3/5] path changes in the docs. --- docs/source/en/api/models/asymmetricautoencoderkl.md | 6 +++--- docs/source/en/api/models/autoencoder_tiny.md | 2 +- docs/source/en/api/models/autoencoderkl.md | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/en/api/models/asymmetricautoencoderkl.md b/docs/source/en/api/models/asymmetricautoencoderkl.md index 1e102943c5e4..fdc71df7a999 100644 --- a/docs/source/en/api/models/asymmetricautoencoderkl.md +++ b/docs/source/en/api/models/asymmetricautoencoderkl.md @@ -49,12 +49,12 @@ make_image_grid([original_image, mask_image, image], rows=1, cols=3) ## AsymmetricAutoencoderKL -[[autodoc]] models.autoencoder_asym_kl.AsymmetricAutoencoderKL +[[autodoc]] models.autoencoders.autoencoder_asym_kl.AsymmetricAutoencoderKL ## AutoencoderKLOutput -[[autodoc]] models.autoencoder_kl.AutoencoderKLOutput +[[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput ## DecoderOutput -[[autodoc]] models.vae.DecoderOutput +[[autodoc]] models.autoencoders.vae.DecoderOutput diff --git a/docs/source/en/api/models/autoencoder_tiny.md b/docs/source/en/api/models/autoencoder_tiny.md index 1d19539bffe8..b5c9dc638e6f 100644 --- a/docs/source/en/api/models/autoencoder_tiny.md +++ b/docs/source/en/api/models/autoencoder_tiny.md @@ -54,4 +54,4 @@ image ## AutoencoderTinyOutput -[[autodoc]] models.autoencoder_tiny.AutoencoderTinyOutput +[[autodoc]] models.autoencoders.autoencoder_tiny.AutoencoderTinyOutput diff --git a/docs/source/en/api/models/autoencoderkl.md b/docs/source/en/api/models/autoencoderkl.md index f42a4d2941dd..72427ab30e6a 100644 --- a/docs/source/en/api/models/autoencoderkl.md +++ b/docs/source/en/api/models/autoencoderkl.md @@ -36,11 +36,11 @@ model = AutoencoderKL.from_single_file(url) ## AutoencoderKLOutput -[[autodoc]] models.autoencoder_kl.AutoencoderKLOutput +[[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput ## DecoderOutput -[[autodoc]] models.vae.DecoderOutput +[[autodoc]] models.autoencoders.vae.DecoderOutput ## FlaxAutoencoderKL From 45e61d69781e0703420c64dd4726295f1d512ca5 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 11 Dec 2023 10:39:09 +0530 Subject: [PATCH 4/5] fix: import structure in init. --- src/diffusers/models/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py index 99a14fed52f3..7487bbf2f98e 100644 --- a/src/diffusers/models/__init__.py +++ b/src/diffusers/models/__init__.py @@ -26,9 +26,9 @@ if is_torch_available(): _import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"] - _import_structure["autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"] _import_structure["autoencoders.autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"] _import_structure["autoencoders.autoencoder_kl"] = ["AutoencoderKL"] + _import_structure["autoencoders.autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"] _import_structure["autoencoders.autoencoder_tiny"] = ["AutoencoderTiny"] _import_structure["autoencoders.consistency_decoder_vae"] = ["ConsistencyDecoderVAE"] _import_structure["controlnet"] = ["ControlNetModel"] From 6f489532adf875cadedfda3d986a3e5d1e6358c0 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 11 Dec 2023 10:44:22 +0530 Subject: [PATCH 5/5] fix controlnetxs import --- src/diffusers/models/controlnetxs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/controlnetxs.py b/src/diffusers/models/controlnetxs.py index 41f2d8af01b1..3cc77fe70d72 100644 --- a/src/diffusers/models/controlnetxs.py +++ b/src/diffusers/models/controlnetxs.py @@ -26,7 +26,7 @@ from .attention_processor import ( AttentionProcessor, ) -from .autoencoder_kl import AutoencoderKL +from .autoencoders import AutoencoderKL from .lora import LoRACompatibleConv from .modeling_utils import ModelMixin from .unet_2d_blocks import (