-
Notifications
You must be signed in to change notification settings - Fork 100
/
Copy pathevaluate.py
80 lines (59 loc) · 2.34 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from predict import *
def evaluate(result, summary = False):
result = list(result)
avg = defaultdict(float) # average
tp = defaultdict(int) # true positives
tpfn = defaultdict(int) # true positives + false negatives
tpfp = defaultdict(int) # true positives + false positives
for _, y0, y1 in result: # actual value, prediction
if HRE:
tp[y0] += (y0 == y1)
tpfn[y0] += 1
tpfp[y1] += 1
continue
for y0, y1 in zip(y0, y1):
tp[y0] += (y0 == y1)
tpfn[y0] += 1
tpfp[y1] += 1
print()
for y in sorted(tpfn.keys()):
pr = (tp[y] / tpfp[y]) if tpfp[y] else 0
rc = (tp[y] / tpfn[y]) if tpfn[y] else 0
avg["macro_pr"] += pr
avg["macro_rc"] += rc
if not summary:
print("label = %s" % y)
print("precision = %f (%d/%d)" % (pr, tp[y], tpfp[y]))
print("recall = %f (%d/%d)" % (rc, tp[y], tpfn[y]))
print("f1 = %f\n" % f1(pr, rc))
avg["macro_pr"] /= len(tpfn)
avg["macro_rc"] /= len(tpfn)
avg["micro_f1"] = sum(tp.values()) / sum(tpfn.values())
print("macro precision = %f" % avg["macro_pr"])
print("macro recall = %f" % avg["macro_rc"])
print("macro f1 = %f" % f1(avg["macro_pr"], avg["macro_rc"]))
print("micro f1 = %f" % avg["micro_f1"])
if TASK in ("word-classification", "word-segmentation"):
evaluate_iob_classification(result)
def evaluate_iob_classification(result):
tp, tpfn, tpfp = 0, 0, 0
isbs = lambda x: re.search("^[BS]-|-[BS]$", x)
for _, Y0, Y1 in result:
tpfn += len(list(filter(isbs, Y0)))
tpfp += len(list(filter(isbs, Y1)))
i = 0
for j, (y0, y1) in enumerate(zip(Y0 + ["B"], Y1 + ["B"])):
if j and isbs(y0) and isbs(y1):
tp += (Y0[i:j] == Y1[i:j])
i = j
if not (tpfp and tpfn):
return
print()
print("TASK = %s in IOB format" % TASK)
print("precision = %f (%d/%d)" % (tp / tpfp, tp, tpfp))
print("recall = %f (%d/%d)" % (tp / tpfn, tp, tpfn))
print("f1 = %f" % f1(tp / tpfp, tp / tpfn))
if __name__ == "__main__":
if len(sys.argv) != 6:
sys.exit("Usage: %s model char_to_idx word_to_idx tag_to_idx test_data" % sys.argv[0])
evaluate(predict(*load_model(sys.argv[1:5]), sys.argv[5]))