This repository has been archived by the owner on Feb 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
eval_rte.py
79 lines (55 loc) · 2.2 KB
/
eval_rte.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Evaluate system's entailment predictions against human entailment judgements,
printing the accuracy score.
"""
# EM - 4/11/11
from xml.etree.cElementTree import iterparse
def evaluate(ref_fname, pred_fname):
"""
Evaluate entailment predictions by comparing with human entailment
judgements. Return accuracy score.
"""
ref_id2label = parse_reference(ref_fname)
pred_id2label = parse_predictions(pred_fname)
assert len(ref_id2label) == len(pred_id2label)
correct = [ id for id, label in ref_id2label.iteritems()
if pred_id2label[id] == label ]
accuracy = len(correct) / float(len(ref_id2label))
return accuracy
def parse_reference(inf):
"""
Read entailment reference form data file in RTE xml format including
entailment judgements. Return a dict mapping pair id to entailment
annotation.
"""
id2label = {}
for event, elem in iterparse(inf):
if elem.tag == "pair":
id2label[elem.get("id")] = elem.get("entailment")
return id2label
def parse_predictions(inf):
"""
Read entailment predictions from file containing entailment predictions in
RTE output format. Return a dict mapping pair id to entailment prediction.
"""
# first line as well as ranking are currently ignored,
# because we are not computing Average Precision
if isinstance(inf, basestring):
inf = open(inf)
# how NOT to use Python ;-)
return dict(pair.split() for pair in inf.read().strip().split("\n")[1:])
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"ref_fname",
metavar="REFERENCE",
help="data file in RTE xml format including entailment judgements")
parser.add_argument(
"pred_fname",
metavar="PREDICTION",
help="file containing entailment predictions in RTE output format")
args = parser.parse_args()
print "Accuracy = %.4f" % evaluate(args.ref_fname, args.pred_fname)