-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathBD_train.py
109 lines (93 loc) · 4.4 KB
/
BD_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
from BD_model import Net
from BD_data_load import TrainDataset, Trainpad, TestDataset, Testpad, all_triggers, all_arguments
from BD_eval import eval
from BD_test import test
def train(model, iterator, optimizer, criterion):
model.train()
for i, batch in enumerate(iterator):
tokens_x_2d, id, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, mask, words_2d, triggers_2d = batch
optimizer.zero_grad()
trigger_logits, trigger_hat_2d, argument_logits, arguments_y_1d, argument_hat_2d = model.predict_triggers(tokens_x_2d=tokens_x_2d,
mask=mask,head_indexes_2d=head_indexes_2d,
arguments_2d=arguments_2d)
triggers_y_2d = torch.LongTensor(triggers_y_2d).to(model.device)
triggers_y_2d = triggers_y_2d.view(-1)
trigger_logits = trigger_logits.view(-1, trigger_logits.shape[-1])
trigger_loss = criterion(trigger_logits, triggers_y_2d)
if len(argument_logits) != 1:
argument_logits = argument_logits.view(-1, argument_logits.shape[-1])
argument_loss = criterion(argument_logits, arguments_y_1d.view(-1))
loss = trigger_loss + 2 * argument_loss
else:
loss = trigger_loss
nn.utils.clip_grad_norm_(model.parameters(), 1.0)
loss.backward()
optimizer.step()
if i % 40 == 0: # monitoring
print("step: {}, loss: {}".format(i, loss.item()))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--batch_size", type=int, default=12)
parser.add_argument("--lr", type=float, default=0.00002)
parser.add_argument("--n_epochs", type=int, default=100)
parser.add_argument("--logdir", type=str, default="output")
parser.add_argument("--trainset", type=str, default="./data/train.json")
parser.add_argument("--devset", type=str, default="./data/dev.json")
parser.add_argument("--testset", type=str, default="./data/test1.json")
hp = parser.parse_args()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Net(
device=device,
trigger_size=len(all_triggers),
argument_size=len(all_arguments)
)
if device == 'cuda':
model = model.cuda()
train_dataset = TrainDataset(hp.trainset)
dev_dataset = TrainDataset(hp.devset)
test_dataset = TestDataset(hp.testset)
train_iter = data.DataLoader(dataset=train_dataset,
batch_size=hp.batch_size,
shuffle=True,
num_workers=4,
collate_fn=Trainpad)
dev_iter = data.DataLoader(dataset=dev_dataset,
batch_size=hp.batch_size,
shuffle=False,
num_workers=4,
collate_fn=Trainpad)
test_iter = data.DataLoader(dataset=test_dataset,
batch_size=hp.batch_size,
shuffle=False,
num_workers=4,
collate_fn=Testpad)
optimizer = optim.Adam(model.parameters(), lr=hp.lr)
criterion = nn.CrossEntropyLoss(ignore_index=0)
if not os.path.exists(hp.logdir):
os.makedirs(hp.logdir)
early_stop = 15
stop = 0
best_scores = 0.0
for epoch in range(1, hp.n_epochs + 1):
stop += 1
print("=========train at epoch={}=========".format(epoch))
train(model, train_iter, optimizer, criterion)
fname = os.path.join(hp.logdir, str(epoch))
print("=========dev at epoch={}=========".format(epoch))
trigger_f1, argument_f1 = eval(model, dev_iter, fname + '_dev')
print("=========test at epoch={}=========".format(epoch))
test(model, test_iter, fname + '_test')
if stop >= early_stop:
print("The best result in epoch={}".format(epoch-early_stop-1))
break
if trigger_f1 + argument_f1 > best_scores:
best_scores = trigger_f1 + argument_f1
stop = 0
print("The new best in epoch={}".format(epoch))
# torch.save(model, "best_model.pt")