Skip to content

Commit

Permalink
apply @deltheil suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
Laurent2916 committed Feb 2, 2024
1 parent b49e438 commit 0a47f5c
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/refiners/foundationals/clip/text_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class CLIPTextEncoderL(CLIPTextEncoder):
Note:
We replace the GeLU activation function with an approximate GeLU to comply with the original CLIP implementation
of OpenAI (https://github.com/openai/CLIP/blob/main/clip/model.py#L166)
of OpenAI (https://github.com/openai/CLIP/blob/a1d0717/clip/model.py#L166)
See [[arXiv:2103.00020] Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020)
for more details.
Expand Down
3 changes: 0 additions & 3 deletions src/refiners/foundationals/latent_diffusion/auto_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,6 @@ def decode(self, x: Tensor) -> Tensor:
x = decoder(x / self.encoder_scale)
return x

# backward-compatibility alias
# TODO: deprecate this method
def image_to_latents(self, image: Image.Image) -> Tensor:
return self.images_to_latents([image])

Expand All @@ -261,7 +259,6 @@ def images_to_latents(self, images: list[Image.Image]) -> Tensor:
def decode_latents(self, x: Tensor) -> Image.Image:
return self.latents_to_image(x)

# TODO: deprecated this method ?
def latents_to_image(self, x: Tensor) -> Image.Image:
if x.shape[0] != 1:
raise ValueError(f"Expected batch size of 1, got {x.shape[0]}")
Expand Down
10 changes: 7 additions & 3 deletions src/refiners/foundationals/latent_diffusion/solvers/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ def add_noise(
def remove_noise(self, x: Tensor, noise: Tensor, step: int) -> Tensor:
"""Remove noise from the input tensor using the current step of the diffusion process.
Note:
See [[arXiv:2006.11239] Denoising Diffusion Probabilistic Models, Equation 15](https://arxiv.org/abs/2006.11239)
and [[arXiv:2210.00939] Improving Sample Quality of Diffusion Models Using Self-Attention Guidance](https://arxiv.org/abs/2210.00939).
Args:
x: The input tensor to remove noise from.
noise: The noise tensor to remove from the input tensor.
Expand All @@ -132,9 +136,6 @@ def remove_noise(self, x: Tensor, noise: Tensor, step: int) -> Tensor:
timestep = self.timesteps[step]
cumulative_scale_factors = self.cumulative_scale_factors[timestep]
noise_stds = self.noise_std[timestep]
# See equation (15) from https://arxiv.org/pdf/2006.11239.pdf.
# Useful to preview progress or for guidance
# See also https://arxiv.org/pdf/2210.00939.pdf (self-attention guidance)
denoised_x = (x - noise_stds * noise) / cumulative_scale_factors
return denoised_x

Expand Down Expand Up @@ -196,6 +197,9 @@ def scale_model_input(self, x: Tensor, step: int) -> Tensor:
This method should only be overridden by solvers that
need to scale the input according to the current timestep.
By default, this method does not scale the input.
(scale=1)
Args:
x: The input tensor to scale.
step: The current step of the diffusion process.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ def set_unet_context(self, *, timestep: Tensor, clip_text_embedding: Tensor, **_
def set_self_attention_guidance(self, enable: bool, scale: float = 1.0) -> None:
"""Set whether to enable self-attention guidance.
See [[arXiv:2210.00939] Improving Sample Quality of Diffusion Models Using Self-Attention Guidance](https://arxiv.org/abs/2210.00939)
for more details.
Args:
enable: Whether to enable self-attention guidance.
scale: The scale to use.
Expand All @@ -114,7 +117,7 @@ def has_self_attention_guidance(self) -> bool:
return self._find_sag_adapter() is not None

def _find_sag_adapter(self) -> SD1SAGAdapter | None:
"""Finds the self-attention guidance adapter."""
"""Finds the self-attention guidance adapter, if any."""
for p in self.unet.get_parents():
if isinstance(p, SD1SAGAdapter):
return p
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ def forward(
def set_self_attention_guidance(self, enable: bool, scale: float = 1.0) -> None:
"""Sets the self-attention guidance.
See [[arXiv:2210.00939] Improving Sample Quality of Diffusion Models Using Self-Attention Guidance](https://arxiv.org/abs/2210.00939)
for more details.
Args:
enable: Whether to enable self-attention guidance or not.
scale: The scale to use.
Expand All @@ -158,7 +161,7 @@ def has_self_attention_guidance(self) -> bool:
return self._find_sag_adapter() is not None

def _find_sag_adapter(self) -> SDXLSAGAdapter | None:
"""Finds the self-attention guidance adapter."""
"""Finds the self-attention guidance adapter, if any."""
for p in self.unet.get_parents():
if isinstance(p, SDXLSAGAdapter):
return p
Expand Down
6 changes: 3 additions & 3 deletions src/refiners/foundationals/segment_anything/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,10 @@ def image_size(self) -> int:
return w

def compute_target_size(self, size: tuple[int, int]) -> tuple[int, int]:
"""Compute the target size for a given size.
"""Compute the target size as expected by the image encoder.
Args:
size: The size of the image.
size: The size of the input image.
Returns:
The target height.
Expand All @@ -171,7 +171,7 @@ def compute_target_size(self, size: tuple[int, int]) -> tuple[int, int]:
return (newh, neww)

def preprocess_image(self, image: Image.Image, target_size: tuple[int, int]) -> Tensor:
"""Preprocess an image.
"""Preprocess an image without distorting its aspect ratio.
Args:
image: The image to preprocess.
Expand Down

0 comments on commit 0a47f5c

Please sign in to comment.