-
Notifications
You must be signed in to change notification settings - Fork 0
/
attention_test.py
35 lines (27 loc) · 901 Bytes
/
attention_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from numpy import array
from numpy import random
from numpy import dot
from scipy.special import softmax
# encoder representations of four different words
word_1 = array([1, 0, 0])
word_2 = array([0, 1, 0])
word_3 = array([1, 1, 0])
word_4 = array([0, 0, 1])
# stacking the word embeddings into a single array
words = array([word_1, word_2, word_3, word_4])
# generating the weight matrices
random.seed(42)
W_Q = random.randint(3, size=(3, 3))
W_K = random.randint(3, size=(3, 3))
W_V = random.randint(3, size=(3, 3))
# generating the queries, keys and values
Q = words @ W_Q
K = words @ W_K
V = words @ W_V
# scoring the query vectors against all key vectors
scores = Q @ K.transpose()
# computing the weights by a softmax operation
weights = softmax(scores / K.shape[1] ** 0.5, axis=1)
# computing the attention by a weighted sum of the value vectors
attention = weights @ V
print(attention)