fa

zpcore · zpcore · commit c805f6bc8f87 · 2024-09-26T20:36:00.000-07:00
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
@@ -24,6 +24,7 @@
 from ..utils.import_utils import is_torch_npu_available, is_xformers_available
 from ..utils.torch_utils import is_torch_version, maybe_allow_in_graph
 
+from torch_xla.experimental.custom_kernel import flash_attention
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
@@ -2364,9 +2365,27 @@ def __call__(
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
         # TODO: add support for attn.scale when we move to Torch 2.1
-        hidden_states = F.scaled_dot_product_attention(
-            query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
-        )
+       
+        # print("piz_test6")
+        # logger.warning(f"piz_debug2: query shape: {query.shape}, key shape: {key.shape}, value shape: {value.shape}")
+        hidden_states = flash_attention(query, key, value, causal=False)
+
+        # Apply the attention mask if provided
+        if attention_mask is not None:
+            # Reshape hidden_states to (batch_size, num_heads, seq_len, head_dim)
+            hidden_states = hidden_states.view(batch_size, attn.heads, -1, head_dim)
+    
+            # Ensure attention_mask is the correct shape (batch_size, 1, 1, seq_len)
+            attention_mask = attention_mask.view(batch_size, 1, 1, -1)
+            
+            # Expand attention_mask to match hidden_states shape
+            attention_mask = attention_mask.expand(-1, attn.heads, hidden_states.size(2), -1)
+    
+            # Apply the mask
+            hidden_states = hidden_states * attention_mask.to(hidden_states.dtype)
+        # hidden_states = F.scaled_dot_product_attention(
+        #     query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
+        # )
 
         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
         hidden_states = hidden_states.to(query.dtype)