From 7ea8bd4e928910b195eaf12b7033e5a2e32b15a3 Mon Sep 17 00:00:00 2001
From: Gabriele Oliaro <goliaro@cs.cmu.edu>
Date: Tue, 8 Oct 2024 07:44:44 +0000
Subject: [PATCH] update

---
 src/ops/fused.cu                            | 7 +------
 tests/inference/inference_alignment_test.py | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/ops/fused.cu b/src/ops/fused.cu
index 76bfa89def..cc681a8352 100644
--- a/src/ops/fused.cu
+++ b/src/ops/fused.cu
@@ -645,12 +645,7 @@ __host__ void
         assert(false && "Fusion currently does not support type");
       }
     }
-    if (metas->meta[op]->inference_debugging &&
-        !(fused->op_op_type[op] == OP_ALLREDUCE ||
-          fused->op_op_type[op] == OP_PARALLEL_IDENTITY ||
-          fused->op_op_type[op] == OP_REPLICATE ||
-          fused->op_op_type[op] == OP_REPARTITION ||
-          fused->op_op_type[op] == OP_COMBINE)) {
+    if (metas->meta[op]->inference_debugging ) {
       std::vector<GenericTensorAccessorR> input_accessors_to_save;
       std::vector<GenericTensorAccessorR> weight_accessors_to_save;
       std::vector<GenericTensorAccessorR> output_accessors_to_save;
diff --git a/tests/inference/inference_alignment_test.py b/tests/inference/inference_alignment_test.py
index 0b8aa75e3e..ee910eafa8 100644
--- a/tests/inference/inference_alignment_test.py
+++ b/tests/inference/inference_alignment_test.py
@@ -584,7 +584,7 @@ def compare(hf_tensor, ff_tensor, label="", additional_ff_tensor=None, tolerance
             hf_tensor_name = f"layers.{i}.self_attn.out_proj"
             ff_tensor_name = convert_hf_filename_to_ff(hf_tensor_name.replace(".out_proj", ".o_proj"))
             # the raw attention result, w/o o_proj. This is the output of senf_attn of FF and the input of o_proj in HF
-            output_comparison = TensorComparisonIdxs(hf_tensor_type="input", ff_tensor_type="output", hf_tensor_idx=0, ff_tensor_idx=0)
+            output_comparison = TensorComparisonIdxs(hf_tensor_type="input", ff_tensor_type="input", hf_tensor_idx=0, ff_tensor_idx=0)
             hf_tensor = get_hf_tensor(hf_tensor_name, output_comparison)
             # ff_tensor = get_ff_tensor(ff_tensor_name, output_comparison, hf_tensor.shape, tp_type=TPType.TO_REDUCE)
             # TP for self-attn partitions the attention heads across TP workers