enable flash and mem attention modules by default

abcucberkeley · Jan 13, 2025 · e06a44f · e06a44f
1 parent c19fc43
commit e06a44f
Showing 1 changed file with 1 addition and 4 deletions.
diff --git a/src/transformer.py b/src/transformer.py
@@ -48,10 +48,7 @@ def forward(self, x, return_attention=False):
         k = self.k_norm(k)
 
         try:
-            if return_attention:
-                raise NotImplementedError
-
-            with torch.backends.cuda.sdp_kernel():
+            with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_mem_efficient=True, enable_math=False):
                 x = F.scaled_dot_product_attention(
                     q, k, v,
                     dropout_p=self.att_drop.p if self.training else 0.,