diff --git a/src/diffusers/models/transformers/transformer_hunyuan_video.py b/src/diffusers/models/transformers/transformer_hunyuan_video.py index 846104718b9a..b5ff734eee25 100644 --- a/src/diffusers/models/transformers/transformer_hunyuan_video.py +++ b/src/diffusers/models/transformers/transformer_hunyuan_video.py @@ -721,7 +721,8 @@ def forward( for i in range(batch_size): attention_mask[i, : effective_sequence_length[i]] = True - attention_mask = attention_mask.unsqueeze(1) # [B, 1, N], for broadcasting across attention heads + # [B, 1, 1, N], for broadcasting across attention heads + attention_mask = attention_mask.unsqueeze(1).unsqueeze(1) # 4. Transformer blocks if torch.is_grad_enabled() and self.gradient_checkpointing: