From 6249264247a0d7a4e6e833ea0482525bcbd5418f Mon Sep 17 00:00:00 2001 From: elie <97572401+eliebak@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:22:14 +0200 Subject: [PATCH] forget comma --- src/nanotron/models/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nanotron/models/llama.py b/src/nanotron/models/llama.py index 0bf80427..80522d62 100644 --- a/src/nanotron/models/llama.py +++ b/src/nanotron/models/llama.py @@ -919,7 +919,7 @@ def get_block_compute_costs(self): # CausalSelfAttention (qkv proj + attn out) + MLP LlamaDecoderLayer: 2 * model_config.num_attention_heads * d_qkv * model_config.hidden_size # Q output projection + 2 * model_config.num_key_value_heads * d_qkv * model_config.hidden_size # KV - + 3 * d_ff * model_config.hidden_size # for the MLP (3 because of the gated mechanism) + + 3 * d_ff * model_config.hidden_size # for the MLP (3 because of the gated mechanism), # This is the last lm_head TensorParallelColumnLinear: model_config.vocab_size * model_config.hidden_size, }