Skip to content

Commit

Permalink
neww decoder arch
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Jan 14, 2024
1 parent 74163df commit 8c913a5
Showing 1 changed file with 20 additions and 3 deletions.
23 changes: 20 additions & 3 deletions python/flexflow/serve/models/falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ def build_model(self, max_tokens_per_batch):
axes,
True,
self.falcon_config.layer_norm_epsilon,
name=f"layers_{i}_input_layernorm",
name=f"layers_{i}_input_layernorm"
if not self.falcon_config.new_decoder_architecture
else f"layers_{i}_ln_attn",
)
else:
token, att_norm = ffmodel.residual_layer_norm(
Expand All @@ -136,7 +138,22 @@ def build_model(self, max_tokens_per_batch):
axes,
True,
self.falcon_config.layer_norm_epsilon,
name=f"layers_{i}_input_layernorm",
name=f"layers_{i}_input_layernorm"
if not self.falcon_config.new_decoder_architecture
else f"layers_{i}_ln_attn",
)

# MLP norm (identical to att norm for old architecture)
if not self.falcon_config.new_decoder_architecture:
mlp_norm = att_norm
else:
# Residual has already computed by attn norm (token = token + mha + mlp_output)
mlp_norm = ffmodel.layer_norm(
token,
axes,
True,
self.falcon_config.layer_norm_epsilon,
name=f"layers_{i}_ln_mlp",
)

if self.mode == InferenceMode.BEAM_SEARCH_MODE:
Expand Down Expand Up @@ -194,7 +211,7 @@ def build_model(self, max_tokens_per_batch):
assert False

dense_h_to_4h = ffmodel.dense(
att_norm,
mlp_norm,
self.falcon_config.hidden_size * 4,
ActiMode.AC_MODE_NONE,
False,
Expand Down

0 comments on commit 8c913a5

Please sign in to comment.