-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpost_extraction.py
89 lines (73 loc) · 2.88 KB
/
post_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python
import utils as ut
import numpy as np
from scipy import stats
import latextabs as lt
# Parameters
used_test = stats.wilcoxon
used_p = 0.05
# Load results
legend = ut.json2object("results/legend.json")
datasets = legend["datasets"]
classifiers = legend["classifiers"]
metrics = legend["metrics"]
folds = legend["folds"]
rescube = np.load("results/rescube.npy")
# storage for ranks
ranks = np.zeros((len(metrics), len(datasets), len(classifiers)))
# First generate tables for each metric
for mid, metric in enumerate(metrics):
print("Metric %s [%i]" % (metric, mid))
table_file = open("results/tab_%s.tex" % metric, "w")
table_file.write(lt.header4classifiers(classifiers))
for did, dataset in enumerate(datasets):
print("| Dataset %10s [%i]" % (dataset, did))
dataset = dataset.replace("_", "-")
# print(dataset)
# continue
# Subtable is 2d (clf, fold)
subtable = rescube[did, :, mid, :]
# Check if metric was valid
if np.isnan(subtable).any():
print("Unvaild")
continue
# Scores as mean over folds
scores = np.mean(subtable, axis=1)
stds = np.std(subtable, axis=1)
# ranks
rank = stats.rankdata(scores, method='average')
ranks[mid, did] = rank
# Get leader and check dependency
# dependency = np.zeros(len(classifiers)).astype(int)
dependency = np.zeros((len(classifiers), len(classifiers)))
for cida, clf_a in enumerate(classifiers):
a = subtable[cida]
for cid, clf in enumerate(classifiers):
b = subtable[cid]
test = used_test(a, b, zero_method="zsplit")
dependency[cida, cid] = test.pvalue > used_p
print(dependency)
print(scores)
print(stds)
table_file.write(lt.row(dataset, scores, stds))
table_file.write(lt.row_stats(dataset, dependency, scores, stds))
table_file.write(lt.footer("Results for %s metric" % metric))
table_file.close()
for i, metric in enumerate(metrics):
table_file = open("results/tab_%s_mean_ranks.tex" % metric, "w")
table_file.write(lt.header4classifiers_ranks(classifiers))
dependency2 = np.zeros((len(classifiers), len(classifiers)))
for cida in range(len(classifiers)):
a = ranks[i].T[cida]
for cid in range(len(classifiers)):
b = ranks[i].T[cid]
test = used_test(a, b, zero_method="zsplit")
dependency2[cida, cid] = test.pvalue > used_p
print(dependency)
print(np.mean(ranks[i], axis=0))
table_file.write(lt.row_ranks(np.mean(ranks[i], axis=0)))
table_file.write(lt.row_stats(dataset, dependency2,
np.mean(ranks[i], axis=0), np.zeros((7))))
table_file.write(
lt.footer("Results for mean ranks according to %s metric" % metric))
table_file.close()