-
Notifications
You must be signed in to change notification settings - Fork 24
/
all_wordsim.py
30 lines (26 loc) · 1.19 KB
/
all_wordsim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import sys
import os
from read_write import read_word_vectors
from ranking import *
if __name__=='__main__':
word_vec_file = sys.argv[1]
word_sim_dir = sys.argv[2]
word_vecs = read_word_vectors(word_vec_file)
print '================================================================================='
print "%6s" %"Serial", "%20s" % "Dataset", "%15s" % "Num Pairs", "%15s" % "Not found", "%15s" % "Rho"
print '================================================================================='
for i, filename in enumerate(os.listdir(word_sim_dir)):
manual_dict, auto_dict = ({}, {})
not_found, total_size = (0, 0)
for line in open(os.path.join(word_sim_dir, filename),'r'):
line = line.strip().lower()
word1, word2, val = line.split()
if word1 in word_vecs and word2 in word_vecs:
manual_dict[(word1, word2)] = float(val)
auto_dict[(word1, word2)] = cosine_sim(word_vecs[word1], word_vecs[word2])
else:
not_found += 1
total_size += 1
print "%6s" % str(i+1), "%20s" % filename, "%15s" % str(total_size),
print "%15s" % str(not_found),
print "%15.4f" % spearmans_rho(assign_ranks(manual_dict), assign_ranks(auto_dict))