Skip to content

Commit

Permalink
Improve MPT fp8 (#1256)
Browse files Browse the repository at this point in the history
Signed-off-by: dmsuehir <dina.s.jones@intel.com>
Co-authored-by: Thanaji Rao Thakkalapelli <tthakkalapelli@habana.ai>
Co-authored-by: regisss <15324346+regisss@users.noreply.github.com>
Co-authored-by: Libin Tang <litang@habana.ai>
Co-authored-by: Yeonsil Yoon <yyoon@habana.ai>
Co-authored-by: Dina Suehiro Jones <dina.s.jones@intel.com>
Co-authored-by: Sayantan Sarkar <supersarkar@gmail.com>
Co-authored-by: Iman Gohari <s.m.iman.gohari@intel.com>
Co-authored-by: Daniel Huang <daniel1.huang@intel.com>
Co-authored-by: Pramod Kumar <144990617+pramodkumar-habanalabs@users.noreply.github.com>
  • Loading branch information
10 people committed Sep 23, 2024
1 parent fc2e671 commit b75216c
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 166 deletions.
8 changes: 4 additions & 4 deletions optimum/habana/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@
GaudiMixtralDecoderLayer,
GaudiMixtralForCausalLM,
GaudiMixtralModel,
GaudiMptAttention,
GaudiMptBlock,
GaudiMptForCausalLM,
GaudiMptModel,
GaudiOPTForCausalLM,
Expand Down Expand Up @@ -152,8 +154,6 @@
gaudi_mistral_rmsnorm_forward,
gaudi_mixtral_block_sparse_moe_forward,
gaudi_mixtral_rmsnorm_forward,
gaudi_mpt_attention_forward,
gaudi_mpt_block_forward,
gaudi_opt_attention_forward,
gaudi_opt_decoder_forward,
gaudi_opt_decoder_layer_forward,
Expand Down Expand Up @@ -420,8 +420,8 @@ def adapt_transformers_to_gaudi():
# Optimization for mpt on Gaudi
transformers.models.mpt.modeling_mpt.MptForCausalLM = GaudiMptForCausalLM
transformers.models.mpt.modeling_mpt.MptModel = GaudiMptModel
transformers.models.mpt.modeling_mpt.MptAttention.forward = gaudi_mpt_attention_forward
transformers.models.mpt.modeling_mpt.MptBlock.forward = gaudi_mpt_block_forward
transformers.models.mpt.modeling_mpt.MptAttention = GaudiMptAttention
transformers.models.mpt.modeling_mpt.MptBlock = GaudiMptBlock

# Optimization for mistral on Gaudi
transformers.models.mistral.modeling_mistral.MistralForCausalLM = GaudiMistralForCausalLM
Expand Down
4 changes: 2 additions & 2 deletions optimum/habana/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@
gaudi_invert_attention_mask,
)
from .mpt import (
GaudiMptAttention,
GaudiMptBlock,
GaudiMptForCausalLM,
GaudiMptModel,
gaudi_mpt_attention_forward,
gaudi_mpt_block_forward,
)
from .opt import (
GaudiOPTForCausalLM,
Expand Down
4 changes: 2 additions & 2 deletions optimum/habana/transformers/models/mpt/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .modeling_mpt import (
GaudiMptAttention,
GaudiMptBlock,
GaudiMptForCausalLM,
GaudiMptModel,
gaudi_mpt_attention_forward,
gaudi_mpt_block_forward,
)
Loading

0 comments on commit b75216c

Please sign in to comment.