-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathflask-kobert.py
123 lines (93 loc) · 3.8 KB
/
flask-kobert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from flask import Flask, request
app = Flask(__name__)
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
import pandas as pd
#KoBERT
from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model
#transformer
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup
from kobertm import BERTDataset
from kobertm import BERTClassifier
## Setting parameters
max_len = 64
batch_size = 64
warmup_ratio = 0.1
num_epochs = 5
max_grad_norm = 1
log_interval = 200
learning_rate = 5e-5
#bertmodel의 vocabulary
device = torch.device("cpu")
bertmodel, vocab = get_pytorch_kobert_model()
model = BERTClassifier(bertmodel).to(device)
model.load_state_dict(torch.load('model.pt', map_location='cpu'))
@app.route("/predict/<arg>")
def predict(arg):
predict_sentence = arg
#토큰화
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)
def new_softmax(a) :
c = np.max(a) # 최댓값
exp_a = np.exp(a-c) # 각각의 원소에 최댓값을 뺀 값에 exp를 취한다. (이를 통해 overflow 방지)
sum_exp_a = np.sum(exp_a)
y = (exp_a / sum_exp_a) * 100
return np.round(y, 3)
def ping(arg):
data = [predict_sentence, '0']
dataset_another = [data]
another_test = BERTDataset(dataset_another, 0, 1, tok, max_len, True, False)
test_dataloader = torch.utils.data.DataLoader(another_test, batch_size=batch_size, num_workers=0)
model.eval()
for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
token_ids = token_ids.long().to(device)
segment_ids = segment_ids.long().to(device)
valid_length= valid_length
label = label.long().to(device)
out = model(token_ids, valid_length, segment_ids)
test_eval=[]
for i in out:
logits=i
logits = logits.detach().cpu().numpy()
min_v = min(logits)
total = 0
probability = []
result_emotion = []
percent = []
logits = np.round(new_softmax(logits), 3).tolist()
for logit in logits:
print(logit)
probability.append(np.round(logit, 3))
if np.argmax(logits) == 0: emotion = "0"
elif np.argmax(logits) == 1: emotion = "1"
elif np.argmax(logits) == 2: emotion = '2'
elif np.argmax(logits) == 3: emotion = '3'
elif np.argmax(logits) == 4: emotion = '4'
elif np.argmax(logits) == 5: emotion = '5'
result_emotion.append(emotion)
percent.append(probability[np.argmax(logits)])
logits[np.argmax(logits)] = 0
if np.argmax(logits) == 0: emotion = "0"
elif np.argmax(logits) == 1: emotion = "1"
elif np.argmax(logits) == 2: emotion = '2'
elif np.argmax(logits) == 3: emotion = '3'
elif np.argmax(logits) == 4: emotion = '4'
elif np.argmax(logits) == 5: emotion = '5'
result_emotion.append(emotion)
percent.append(probability[np.argmax(logits)])
print(result_emotion, percent)
return result_emotion, percent
result_emotion, percent = ping(predict_sentence)
if percent[0] <= 60.0:
return 'null'
#emotion_list = dict({'0':'기쁨','1':'당황','2':'분노','3':'불안','4':'상처','5':'슬픔'})
else:
return dict({'result_emotion': result_emotion, 'percent': percent})