Use threads_per_warp=16 for 06-fused-attention.py (#1146)

To enable the DPAS on 06 tutorial.
intel · May 17, 2024 · b8c32f0 · b8c32f0
1 parent 5e2256f
commit b8c32f0
Showing 1 changed file with 3 additions and 1 deletion.
diff --git a/python/tutorials/06-fused-attention.py b/python/tutorials/06-fused-attention.py
@@ -468,6 +468,7 @@ def forward(ctx, q, k, v, causal, sm_scale):
             N_CTX=q.shape[2],  #
             HEAD_DIM=HEAD_DIM_K,  #
             STAGE=stage,  #
+            threads_per_warp=16,  #
             **extra_kern_args)
 
         ctx.save_for_backward(q, k, v, o, M)
@@ -514,7 +515,8 @@ def backward(ctx, do):
             BLK_SLICE_FACTOR=BLK_SLICE_FACTOR,  #
             HEAD_DIM=ctx.HEAD_DIM,  #
             num_warps=NUM_WARPS,  #
-            num_stages=NUM_STAGES  #
+            num_stages=NUM_STAGES,  #
+            threads_per_warp=16  #
         )
 
         return dq, dk, dv, None, None