fa

zpcore · zpcore · commit 40f5dcadd15f · 2024-09-26T20:44:16.000-07:00
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
@@ -2374,15 +2374,20 @@ def __call__(
         if attention_mask is not None:
             # Reshape hidden_states to (batch_size, num_heads, seq_len, head_dim)
             hidden_states = hidden_states.view(batch_size, attn.heads, -1, head_dim)
-    
+        
             # Ensure attention_mask is the correct shape (batch_size, 1, 1, seq_len)
-            attention_mask = attention_mask.view(batch_size, 1, 1, -1)
+            if attention_mask.shape != (batch_size, 1, 1, hidden_states.size(-2)):
+                attention_mask = attention_mask.view(batch_size, 1, 1, -1)
+                attention_mask = F.pad(attention_mask, (0, hidden_states.size(-2) - attention_mask.size(-1)), value=1)
             
             # Expand attention_mask to match hidden_states shape
             attention_mask = attention_mask.expand(-1, attn.heads, hidden_states.size(2), -1)
-    
+        
             # Apply the mask
             hidden_states = hidden_states * attention_mask.to(hidden_states.dtype)
+        
+        # Reshape hidden_states back to its original shape
+        hidden_states = hidden_states.view(batch_size, -1, attn.heads * head_dim)
         # hidden_states = F.scaled_dot_product_attention(
         #     query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         # )