-
Notifications
You must be signed in to change notification settings - Fork 3
/
eval.py
58 lines (53 loc) · 2.47 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""Evaluate learned word representation on word similarity task"""
import sys
import os
import logging
import argparse
import numpy as np
import pandas as pd
from fasttext import load_model
from scipy.stats import spearmanr
from tqdm import tqdm
from convert_subchar import convert_graphical as graphical
from convert_subchar import convert_wubi as wubi
def main():
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s')
parser = argparse.ArgumentParser(description='Evaluate word representations')
parser.add_argument('--input', type=str, default='wordsim-296.txt', help='Evaluation data path')
parser.add_argument('--model_path', type=str, default='model.bin', help='Trained model path')
parser.add_argument('--subword', type=str, default='character', help='Convert evaluation to subcharacters if needed {character, graphical, wubi}')
parser.add_argument('--output', type=str, default='result.txt',help='Path to save the words pairs and their predicted cosine similarity scores')
args = parser.parse_args()
logging.info('Start evaluation for {} data..'.format(args.subword))
model = load_model(args.model_path)
eval_data = open(args.input, 'r')
human_score = []
result_list = [] # Store (w1, w2, similarity) for error analysis
for line in tqdm(eval_data):
word1, word2, human = line.split()
if args.subword == 'wubi':
w1, w2 = wubi(word1), wubi(word2)
elif args.subword == 'graphical':
w1, w2 = graphical(word1), graphical(word2)
elif args.subword == 'character':
w1, w2 = word1, word2
else:
logging.error('Please enter the correct subword component {character, graphical, wubi}')
break
# Compute cosine similarity
emb1, emb2 = model[w1], model[w2]
pred = np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))
human_score.append(human)
result_list.append((word1, word2, pred))
# Save predicted scores
result = pd.DataFrame(result_list, columns =['word_1', 'word_2', 'pred_score'])
result.to_csv(args.output, sep='\t')
# Compute Spearman correlation coefficients for evaluation
pred_score = [i[-1] for i in result_list]
corr = spearmanr(human_score, pred_score)
logging.info('Finish evaluation on dataset {}. Score = {}'.format(args.input,corr))
eval_data.close()
logging.info('Done')
if __name__ == '__main__':
main()