Skip to content

Commit

Permalink
Remove custom LayerNorm. See karpathy#454 (comment)
Browse files Browse the repository at this point in the history
  • Loading branch information
sopotc committed Mar 17, 2024
1 parent 67ac63c commit 2ba87b1
Showing 1 changed file with 3 additions and 14 deletions.
17 changes: 3 additions & 14 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,6 @@
import torch.nn as nn
from torch.nn import functional as F

class LayerNorm(nn.Module):
""" LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """

def __init__(self, ndim, bias):
super().__init__()
self.weight = nn.Parameter(torch.ones(ndim))
self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None

def forward(self, input):
return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)

class CausalSelfAttention(nn.Module):

def __init__(self, config):
Expand Down Expand Up @@ -84,9 +73,9 @@ class Block(nn.Module):

def __init__(self, config):
super().__init__()
self.ln_1 = LayerNorm(config.n_embd, bias=config.bias)
self.ln_1 = nn.LayerNorm(config.n_embd, bias=config.bias)
self.attn = CausalSelfAttention(config)
self.ln_2 = LayerNorm(config.n_embd, bias=config.bias)
self.ln_2 = nn.LayerNorm(config.n_embd, bias=config.bias)
self.mlp = MLP(config)


Expand Down Expand Up @@ -118,7 +107,7 @@ def __init__(self, config):
wpe = nn.Embedding(config.block_size, config.n_embd),
drop = nn.Dropout(config.dropout),
h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
ln_f = LayerNorm(config.n_embd, bias=config.bias),
ln_f = nn.LayerNorm(config.n_embd, bias=config.bias),
))
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
# with weight tying when using torch.compile() some warnings get generated:
Expand Down

0 comments on commit 2ba87b1

Please sign in to comment.