fix

flexflow · Nov 8, 2024 · 3632754 · 3632754
1 parent b56ebd3
commit 3632754
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 56 deletions.
diff --git a/src/runtime/model.cc b/src/runtime/model.cc
@@ -3420,63 +3420,16 @@ bool FFModel::need_to_add_combine(int layer_idx) const {
 
 bool FFModel::need_to_add_allreduce(int layer_idx) const {
   auto const &l = layers[layer_idx];
-  if (config.computationMode == COMP_MODE_INFERENCE &&
-      config.tensor_parallelism_degree > 1 &&
-      (
-          //  l->op_type == OP_INC_MULTIHEAD_SELF_ATTENTION ||
-          //  l->op_type == OP_TREE_INC_MULTIHEAD_SELF_ATTENTION ||
-          (std::string(l->name).find("attn.o_proj") != std::string::npos) ||
-          // mlp layer
-          is_mlp_block(layer_idx) ||
-          // llama mlp layer
-          (l->op_type == OP_LINEAR && layer_idx >= 2 &&
-           layers[layer_idx - 1]->op_type == OP_GELU &&
-           layers[layer_idx - 2]->op_type == OP_LINEAR) ||
-          // LLAMA without element-wise operator fusion
-          (l->op_type == OP_LINEAR && layer_idx >= 5 &&
-           layers[layer_idx - 1]->op_type == OP_EW_MUL &&
-           layers[layer_idx - 2]->op_type == OP_EW_MUL &&
-           layers[layer_idx - 3]->op_type == OP_SIGMOID &&
-           layers[layer_idx - 4]->op_type == OP_LINEAR &&
-           layers[layer_idx - 5]->op_type == OP_LINEAR) ||
-          // LLAMA with element-wise operator fusion
-          (l->op_type == OP_LINEAR && layer_idx >= 3 &&
-           layers[layer_idx - 1]->op_type == OP_SIGMOID_SILU_MULTI &&
-           layers[layer_idx - 2]->op_type == OP_LINEAR &&
-           layers[layer_idx - 3]->op_type == OP_LINEAR))) {
+  if (config.computationMode == COMP_MODE_INFERENCE && config.tensor_parallelism_degree > 1 &&
+      ((l->op_type == OP_LINEAR && std::string(l->name).find("attn.o_proj") != std::string::npos) ||
+        is_mlp_block(layer_idx) ||
+        (l->op_type == OP_LINEAR && std::string(l->name).find("mlp.down_proj") != std::string::npos)
+      )) {
     return true;
   }
   return false;
 }
 
-#ifdef DEADCODE
-bool FFModel::need_to_add_parallel_identity(int layer_idx) const {
-  auto const &l = layers[layer_idx];
-  // add parallel identity (allreduce in the backward pass) before the lm head
-  // we find the lm head by looking for the linear layer right after a residual
-  // rms norm / layer norm, and before a softmax, followed by
-  // argmax/argtopk/sampling
-  if (config.computationMode == COMP_MODE_INFERENCE &&
-      config.tensor_parallelism_degree > 1 &&
-      ((l->op_type == OP_RESIDUAL_RMS_NORM ||
-        l->op_type == OP_RESIDUAL_LAYERNORM) &&
-       // there are at least 2 layers before the norm, and at least 3 following
-       // the norm
-       layer_idx >= 2 && layer_idx < layers.size() - 3 &&
-       // norm is followed by linear layer (lm head)
-       layers[layer_idx + 1]->op_type == OP_LINEAR &&
-       // lm head is followed by softmax
-       layers[layer_idx + 2]->op_type == OP_SOFTMAX &&
-       // softmax is followed by argmax/argtopk/sampling
-       (layers[layer_idx + 3]->op_type == OP_ARG_TOPK ||
-        layers[layer_idx + 3]->op_type == OP_SAMPLING ||
-        layers[layer_idx + 3]->op_type == OP_ARGMAX ||
-        layers[layer_idx + 3]->op_type == OP_SCALAR_TRUE_DIV))) {
-    return true;
-  }
-  return false;
-}
-#endif
 bool FFModel::need_to_add_parallel_identity(int layer_idx) const {
   auto const &l = layers[layer_idx];
   // add parallel identity (allreduce in the backward pass) before the lm head

diff --git a/tests/peft/peft_alignment_test.py b/tests/peft/peft_alignment_test.py
@@ -655,7 +655,8 @@ def compare(hf_tensor, ff_tensor, label="", additional_ff_tensor=None, tolerance
                 ff_tensor_name = f"layers.{i}.layers.{i}.input_layernorm"
                 _output_comparison = TensorComparisonIdxs(hf_tensor_type="input_gradient", ff_tensor_type="output_gradient", hf_tensor_idx=0, ff_tensor_idx=1)
                 input_layernorm_out1 = get_ff_tensor(ff_tensor_name, _output_comparison, hf_tensor.shape, tp_type=TPType.REPLICATE)
-                torch.testing.assert_close(attn_input, input_layernorm_out1, rtol=1.3e-6, atol=1e-5)
+                compare_loaded_tensors(attn_input, input_layernorm_out1, tolerance=1e-5)
+                # torch.testing.assert_close(attn_input, input_layernorm_out1, rtol=1.3e-6, atol=1e-5)
 
                 # Input layernorm
 

diff --git a/tests/peft_test.sh b/tests/peft_test.sh
@@ -45,8 +45,8 @@ echo "Python test"
 # C++ test
 echo "C++ test"
 ./build/inference/peft/peft \
-    -ll:gpu 1 -ll:cpu 4 -ll:util 4 \
-    -tensor-parallelism-degree 1 \
+    -ll:gpu 4 -ll:cpu 4 -ll:util 4 \
+    -tensor-parallelism-degree 4 \
     -ll:fsize 8192 -ll:zsize 12000 \
     -llm-model JackFram/llama-160m \
     -finetuning-dataset ./inference/prompt/peft_dataset.json \
@@ -55,7 +55,7 @@ echo "C++ test"
     --use-full-precision \
     --inference-debugging
 # Check alignment
-python ./tests/peft/peft_alignment_test.py -tp 1 -lr 1.0
+python ./tests/peft/peft_alignment_test.py -tp 4 -lr 1.0
 
 # Print succeess message
 echo ""