-
Notifications
You must be signed in to change notification settings - Fork 0
/
training.py
110 lines (94 loc) · 2.43 KB
/
training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from pydoc import doc
from pyexpat import model
import random
import json
import pickle
from black import out
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
lemmatizer = WordNetLemmatizer()
print(f"Using GPU:", tf.config.list_physical_devices("GPU"))
intents = json.loads(open("intents.json").read())
print(intents)
words = []
classes = []
documents = []
ignore_letters = [
"?",
"",
",",
".",
"!",
";",
"(",
")",
"[",
"]",
"{",
"}",
"@",
"#",
"$",
"%",
"^",
"&",
"*",
"+",
"=",
"_",
"~",
"`",
"|",
"\\",
"<",
">",
"/",
"\n",
"\t",
"\r",
]
for intent in intents["intents"]:
for pattern in intent["patterns"]:
world_list = nltk.word_tokenize(pattern)
words.extend(world_list)
documents.append((world_list, intent["tag"]))
if intent["tag"] not in classes:
classes.append(intent["tag"])
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
words = sorted(set(words))
classes = sorted(set(classes))
pickle.dump(words, open("words.pkl", "wb"))
pickle.dump(classes, open("classes.pkl", "wb"))
training = []
output_empty = [0] * len(classes)
for document in documents:
bag = []
word_patterns = document[0]
word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
for word in words:
bag.append(1) if word in word_patterns else bag.append(0)
output_row = list(output_empty)
output_row[classes.index(document[1])] = 1
training.append([bag, output_row])
random.shuffle(training)
training = np.array(training)
train_x = list(training[:, 0])
train_y = list(training[:, 1])
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation="softmax"))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])
hist = model.fit(
np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1
)
model.save("chatbot_model.model", hist)
print("Model done!")