Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump 3rdparty/Megatron-LM from 2da43ef to 65720c8 #579

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 3rdparty/Megatron-LM
Submodule Megatron-LM updated 441 files
365 changes: 0 additions & 365 deletions sub-packages/bionemo-esm2/src/bionemo/esm2/model/attention.py

This file was deleted.

4 changes: 1 addition & 3 deletions sub-packages/bionemo-esm2/src/bionemo/esm2/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from torch.optim import Optimizer

from bionemo.esm2.data.tokenizer import BioNeMoESMTokenizer
from bionemo.esm2.model.attention import ESM2DotProductAttention, ESM2TEDotProductAttention
from bionemo.esm2.model.embedding import ESM2Embedding
from bionemo.llm.api import MegatronLossType
from bionemo.llm.model.biobert.model import BioBertConfig, MegatronBioBertModel, PositionEmbeddingKinds
Expand Down Expand Up @@ -294,6 +293,7 @@ class ESM2GenericConfig(BioBertConfig[ESM2ModelT, MegatronLossType]):
bias_activation_fusion: bool = True # True degrades accuracy slightly, but is faster.
activation_func: Callable = F.gelu # esm_gelu_func # ESM2 MLP
init_method_std: float = 0.02
softmax_scale: float = 1.0

# embedding
token_dropout: bool = True
Expand Down Expand Up @@ -346,13 +346,11 @@ def __post_init__(self):
super().__post_init__()
if self.biobert_spec_option == BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec:
self.apply_query_key_layer_scaling = False
self.core_attention_override = ESM2TEDotProductAttention
elif self.biobert_spec_option == BiobertSpecOption.esm2_bert_layer_local_spec:
logging.warning(
"BiobertSpecOption.esm2_bert_layer_local_spec is depreciated. Use BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec instead."
)
self.apply_query_key_layer_scaling = True
self.core_attention_override = ESM2DotProductAttention
else:
raise ValueError(f"Unknown biobert_spec_option: {self.biobert_spec_option}")

Expand Down
Loading
Loading