-
Notifications
You must be signed in to change notification settings - Fork 4
/
param_format.txt
80 lines (80 loc) · 1.95 KB
/
param_format.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
embedding
embedding (50265, 768)
encoder_embed_positions (1026, 768)
decoder_embed_positions (1026, 768)
encoder_embed_layer_norm
scale (768,)
bias (768,)
decoder_embed_layer_norm
scale (768,)
bias (768,)
encoder_layers
0..5
self_attn
q_proj
kernel (768, 12, 64)
bias (12, 64)
k_proj
kernel (768, 12, 64)
bias (12, 64)
v_proj
kernel (768, 12, 64)
bias (12, 64)
ff
kernel (12, 64, 768)
bias (768,)
self_attn_layer_norm
scale (768,)
bias (768,)
ff0
kernel (768, 3072)
bias (3072,)
ff1
kernel (3072, 768)
bias (768,)
final_layer_norm
scale (768,)
bias (768,)
decoder_layers
0..5
self_attn
q_proj
kernel (768, 12, 64)
bias (12, 64)
k_proj
kernel (768, 12, 64)
bias (12, 64)
v_proj
kernel (768, 12, 64)
bias (12, 64)
ff
kernel (12, 64, 768)
bias (768,)
self_attn_layer_norm
scale (768,)
bias (768,)
cross_attn
q_proj
kernel (768, 12, 64)
bias (12, 64)
k_proj
kernel (768, 12, 64)
bias (12, 64)
v_proj
kernel (768, 12, 64)
bias (12, 64)
ff
kernel (12, 64, 768)
bias (768,)
cross_attn_layer_norm
scale (768,)
bias (768,)
ff0
kernel (768, 3072)
bias (3072,)
ff1
kernel (3072, 768)
bias (768,)
final_layer_norm
scale (768,)
bias (768,)