forked from interactive-cookbook/tagger-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
elmo_eng.jsonnet
142 lines (133 loc) · 3.43 KB
/
elmo_eng.jsonnet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// GPU
local cuda_device = 1;
// ELMo
local options_file = '/proj/cookbook.shadow/elmo_english/model.text_field_embedder.elmo.options_file';
local weight_file = '/proj/cookbook.shadow/elmo_english/model.text_field_embedder.elmo.weight_file';
local elmo_dropout = 0.0;
local elmo_embedding_dim = 1024;
// CRF
local crf_dropout = 0.5;
// character encoding CNN
local min_padding_length = 3;
local char_embedding_dim = 16;
local cnn_num_filters = 128;
local cnn_windows = [3];
// LSTM
local lstm_input_size = elmo_embedding_dim + cnn_num_filters;
local lstm_bidirectional = true;
local lstm_num_layers = 2;
local lstm_hidden_size = 50;
local lstm_dropout = 0.5;
// trainer
local optimizer = 'adam';
local lr = 0.0075;
local num_epochs = 100;
local grad_norm = 10.0;
local patience = 10;
// batch size
local batch_size = 30;
// Gradient accumulation
local num_gradient_accumulation_steps = 1;
// Gradient checkpointing
local gradient_checkpointing = false;
// Automatic mixed precision (AMP)
local use_amp = false;
// data paths
local train_data_path = 'data/English/Tagger/train.conll03';
local validation_data_path = 'data/English/Tagger/dev.conll03';
local test_data_path = 'data/English/Tagger/test.conll03';
// change to false to disable sanity checks
local sanity_check = true;
{
dataset_reader: {
type: 'conll2003',
tag_label: 'ner',
coding_scheme: 'BIOUL',
token_indexers: {
tokens: {
type: 'elmo_characters',
},
token_characters: {
type: 'characters',
min_padding_length: min_padding_length,
},
},
},
datasets_for_vocab_creation: ['train'],
train_data_path: train_data_path,
validation_data_path: validation_data_path,
test_data_path: test_data_path,
evaluate_on_test: true,
model: {
type: 'crf_tagger',
label_encoding: 'BIOUL',
dropout: crf_dropout,
text_field_embedder: {
token_embedders: {
tokens: {
type: 'elmo_token_embedder',
options_file: options_file,
weight_file: weight_file,
do_layer_norm: false,
dropout: elmo_dropout,
},
token_characters: {
type: 'character_encoding',
embedding: {
embedding_dim: char_embedding_dim,
vocab_namespace: "token_characters",
},
encoder: {
type: 'cnn',
embedding_dim: char_embedding_dim,
num_filters: cnn_num_filters,
ngram_filter_sizes: cnn_windows,
conv_layer_activation: 'relu',
}
},
},
},
encoder: {
type: 'lstm',
input_size: lstm_input_size,
hidden_size: lstm_hidden_size,
bidirectional: lstm_bidirectional,
num_layers: lstm_num_layers,
dropout: lstm_dropout,
},
regularizer: {
regexes: [
[
'scalar_parameters',
{
type: 'l2',
alpha: 0.5,
},
]
]
},
},
data_loader: {
batch_sampler: {
type: "bucket",
batch_size : batch_size
}
},
trainer: {
optimizer: {
type: optimizer,
lr: lr,
},
checkpointer: {
keep_most_recent_by_count: 1,
},
validation_metric: '+accuracy',
num_epochs: num_epochs,
grad_norm: grad_norm,
num_gradient_accumulation_steps: num_gradient_accumulation_steps,
use_amp: use_amp,
patience: patience,
cuda_device: cuda_device,
run_confidence_checks: sanity_check,
}
}