Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Nov 8, 2024
1 parent b56ebd3 commit 3632754
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 56 deletions.
57 changes: 5 additions & 52 deletions src/runtime/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3420,63 +3420,16 @@ bool FFModel::need_to_add_combine(int layer_idx) const {

bool FFModel::need_to_add_allreduce(int layer_idx) const {
auto const &l = layers[layer_idx];
if (config.computationMode == COMP_MODE_INFERENCE &&
config.tensor_parallelism_degree > 1 &&
(
// l->op_type == OP_INC_MULTIHEAD_SELF_ATTENTION ||
// l->op_type == OP_TREE_INC_MULTIHEAD_SELF_ATTENTION ||
(std::string(l->name).find("attn.o_proj") != std::string::npos) ||
// mlp layer
is_mlp_block(layer_idx) ||
// llama mlp layer
(l->op_type == OP_LINEAR && layer_idx >= 2 &&
layers[layer_idx - 1]->op_type == OP_GELU &&
layers[layer_idx - 2]->op_type == OP_LINEAR) ||
// LLAMA without element-wise operator fusion
(l->op_type == OP_LINEAR && layer_idx >= 5 &&
layers[layer_idx - 1]->op_type == OP_EW_MUL &&
layers[layer_idx - 2]->op_type == OP_EW_MUL &&
layers[layer_idx - 3]->op_type == OP_SIGMOID &&
layers[layer_idx - 4]->op_type == OP_LINEAR &&
layers[layer_idx - 5]->op_type == OP_LINEAR) ||
// LLAMA with element-wise operator fusion
(l->op_type == OP_LINEAR && layer_idx >= 3 &&
layers[layer_idx - 1]->op_type == OP_SIGMOID_SILU_MULTI &&
layers[layer_idx - 2]->op_type == OP_LINEAR &&
layers[layer_idx - 3]->op_type == OP_LINEAR))) {
if (config.computationMode == COMP_MODE_INFERENCE && config.tensor_parallelism_degree > 1 &&
((l->op_type == OP_LINEAR && std::string(l->name).find("attn.o_proj") != std::string::npos) ||
is_mlp_block(layer_idx) ||
(l->op_type == OP_LINEAR && std::string(l->name).find("mlp.down_proj") != std::string::npos)
)) {
return true;
}
return false;
}

#ifdef DEADCODE
bool FFModel::need_to_add_parallel_identity(int layer_idx) const {
auto const &l = layers[layer_idx];
// add parallel identity (allreduce in the backward pass) before the lm head
// we find the lm head by looking for the linear layer right after a residual
// rms norm / layer norm, and before a softmax, followed by
// argmax/argtopk/sampling
if (config.computationMode == COMP_MODE_INFERENCE &&
config.tensor_parallelism_degree > 1 &&
((l->op_type == OP_RESIDUAL_RMS_NORM ||
l->op_type == OP_RESIDUAL_LAYERNORM) &&
// there are at least 2 layers before the norm, and at least 3 following
// the norm
layer_idx >= 2 && layer_idx < layers.size() - 3 &&
// norm is followed by linear layer (lm head)
layers[layer_idx + 1]->op_type == OP_LINEAR &&
// lm head is followed by softmax
layers[layer_idx + 2]->op_type == OP_SOFTMAX &&
// softmax is followed by argmax/argtopk/sampling
(layers[layer_idx + 3]->op_type == OP_ARG_TOPK ||
layers[layer_idx + 3]->op_type == OP_SAMPLING ||
layers[layer_idx + 3]->op_type == OP_ARGMAX ||
layers[layer_idx + 3]->op_type == OP_SCALAR_TRUE_DIV))) {
return true;
}
return false;
}
#endif
bool FFModel::need_to_add_parallel_identity(int layer_idx) const {
auto const &l = layers[layer_idx];
// add parallel identity (allreduce in the backward pass) before the lm head
Expand Down
3 changes: 2 additions & 1 deletion tests/peft/peft_alignment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,8 @@ def compare(hf_tensor, ff_tensor, label="", additional_ff_tensor=None, tolerance
ff_tensor_name = f"layers.{i}.layers.{i}.input_layernorm"
_output_comparison = TensorComparisonIdxs(hf_tensor_type="input_gradient", ff_tensor_type="output_gradient", hf_tensor_idx=0, ff_tensor_idx=1)
input_layernorm_out1 = get_ff_tensor(ff_tensor_name, _output_comparison, hf_tensor.shape, tp_type=TPType.REPLICATE)
torch.testing.assert_close(attn_input, input_layernorm_out1, rtol=1.3e-6, atol=1e-5)
compare_loaded_tensors(attn_input, input_layernorm_out1, tolerance=1e-5)
# torch.testing.assert_close(attn_input, input_layernorm_out1, rtol=1.3e-6, atol=1e-5)

# Input layernorm

Expand Down
6 changes: 3 additions & 3 deletions tests/peft_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ echo "Python test"
# C++ test
echo "C++ test"
./build/inference/peft/peft \
-ll:gpu 1 -ll:cpu 4 -ll:util 4 \
-tensor-parallelism-degree 1 \
-ll:gpu 4 -ll:cpu 4 -ll:util 4 \
-tensor-parallelism-degree 4 \
-ll:fsize 8192 -ll:zsize 12000 \
-llm-model JackFram/llama-160m \
-finetuning-dataset ./inference/prompt/peft_dataset.json \
Expand All @@ -55,7 +55,7 @@ echo "C++ test"
--use-full-precision \
--inference-debugging
# Check alignment
python ./tests/peft/peft_alignment_test.py -tp 1 -lr 1.0
python ./tests/peft/peft_alignment_test.py -tp 4 -lr 1.0

# Print succeess message
echo ""
Expand Down

0 comments on commit 3632754

Please sign in to comment.