-
Notifications
You must be signed in to change notification settings - Fork 50
/
evaluate_classification_accuracy.py
198 lines (152 loc) · 10.3 KB
/
evaluate_classification_accuracy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import numpy as np
from sklearn.svm import LinearSVC
from scipy.spatial.distance import cdist
import keras
import sys, argparse, pickle, os.path
from collections import OrderedDict
import utils
from datasets import get_data_generator
from class_hierarchy import ClassHierarchy
from learn_labelembedding import labelembed_loss
METRICS = ['Accuracy', 'Top-5 Accuracy', 'Avg. Accuracy', 'Hierarchical Accuracy']
def train_and_predict(data, model, layer = None, normalize = False, augmentation_epochs = 1, C = 1.0, custom_objects = {}, batch_size = 1):
""" Extracts image features, trains a linear SVM for classification, and returns predictions on the test data. """
# Load model
if isinstance(model, str):
model = keras.models.load_model(model, custom_objects = custom_objects, compile = False)
if layer is not None:
model = keras.models.Model(model.inputs[0], model.layers[layer].output if isinstance(layer, int) else model.get_layer(layer).output)
# Extract features
sys.stderr.write('Extracting features...\n')
X_train = model.predict_generator(data.flow_train(10, False, shuffle = False, augment = augmentation_epochs > 1), augmentation_epochs * (data.num_train // 10), verbose = 1)
X_test = model.predict_generator(data.flow_test(batch_size, False, shuffle = False, augment = False), data.num_test // batch_size, verbose = 1)
if normalize:
X_train /= np.linalg.norm(X_train, axis = -1, keepdims = True)
X_test /= np.linalg.norm(X_test, axis = -1, keepdims = True)
else:
X_max = np.abs(X_train).max(axis = 0, keepdims = True)
X_train /= np.maximum(1e-8, X_max)
X_test /= np.maximum(1e-8, X_max)
# Train SVM
sys.stderr.write('Training SVM...\n')
svm = LinearSVC(C = C, verbose = 1)
svm.fit(X_train, np.tile(data.labels_train, augmentation_epochs))
# Predict test classes
sys.stderr.write('\nPredicting and evaluating...\n')
return svm.decision_function(X_test).argsort(axis = -1)[:,::-1]
def nn_classification(data, centroids, model, layer = None, custom_objects = {}, batch_size = 1):
""" Extracts image embeddings and performs classification by assigning samples to the class of the nearest embedding. """
# Load class centroids
if isinstance(centroids, str):
with open(centroids, 'rb') as f:
centroids = pickle.load(f)['embedding']
# Load model
if isinstance(model, str):
model = keras.models.load_model(model, custom_objects = custom_objects, compile = False)
if layer is not None:
model = keras.models.Model(model.inputs[0], model.layers[layer].output if isinstance(layer, int) else model.get_layer(layer).output)
# Extract features
sys.stderr.write('Extracting features...\n')
feat = model.predict_generator(data.flow_test(batch_size, False, shuffle = False, augment = False), data.num_test // batch_size, verbose = 1)
# Classify
sys.stderr.write('Searching for nearest class centroids...\n')
return cdist(feat, centroids, 'sqeuclidean').argsort(axis = -1)
def extract_predictions(data, model, layer = None, custom_objects = {}, batch_size = 1):
""" Extracts class predictions. """
# Load model
if isinstance(model, str):
model = keras.models.load_model(model, custom_objects = custom_objects, compile = False)
if layer is not None:
model = keras.models.Model(model.inputs[0], model.layers[layer].output if isinstance(layer, int) else model.get_layer(layer).output)
# Extract predictions
sys.stderr.write('Predicting and evaluating...\n')
return model.predict_generator(data.flow_test(batch_size, False, shuffle = False, augment = False), data.num_test // batch_size, verbose = 1).argsort(axis = -1)[:,::-1]
def evaluate(y_pred, data_generator, hierarchy):
perf = OrderedDict()
y_true = np.asarray(data_generator.labels_test)
if y_pred.ndim == 2:
perf['Top-5 Accuracy'] = np.mean(np.any(y_pred[:,:5] == y_true[:,None], axis = -1))
y_pred = y_pred[:,0]
perf['Accuracy'] = np.mean(y_pred == y_true)
class_freq = np.bincount(y_true)
perf['Avg. Accuracy'] = ((y_pred == y_true).astype(np.float) / class_freq[y_true]).sum() / len(class_freq)
if hierarchy is not None:
perf['Hierarchical Accuracy'] = 0.0
for yp, yt in zip(y_pred, y_true):
perf['Hierarchical Accuracy'] += 1.0 - hierarchy.lcs_height(data_generator.classes[int(yp)], data_generator.classes[int(yt)])
perf['Hierarchical Accuracy'] /= len(y_true)
return perf
def print_performance(perf, metrics = METRICS):
print()
# Print header
max_name_len = max(len(lbl) for lbl in perf.keys())
print(' | '.join([' ' * max_name_len] + ['{:^6s}'.format(metric) for metric in METRICS]))
print('-' * (max_name_len + sum(3 + max(6, len(metric)) for metric in METRICS)))
# Print result rows
for lbl, results in perf.items():
print('{:{}s} | {}'.format(lbl, max_name_len, ' | '.join('{:>{}.4f}'.format(results[metric], max(len(metric), 6)) if metric in results else '{:>{}s}'.format('--', max(len(metric), 6)) for metric in METRICS)))
print()
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description = 'Evaluates flat, balanced, and hierarchical accuracy of several models.', formatter_class = argparse.ArgumentDefaultsHelpFormatter)
arggroup = parser.add_argument_group('Dataset')
arggroup.add_argument('--dataset', type = str, required = True, help = 'Training dataset. See README.md for a list of available datasets.')
arggroup.add_argument('--data_root', type = str, required = True, help = 'Root directory of the dataset.')
arggroup.add_argument('--hierarchy', type = str, default = None, help = 'Path to a file containing parent-child relationships (one per line). Used for evaluating hierarchical accuracy.')
arggroup.add_argument('--is_a', action = 'store_true', default = False, help = 'If given, --hierarchy is assumed to contain is-a instead of parent-child relationships.')
arggroup.add_argument('--str_ids', action = 'store_true', default = False, help = 'If given, class IDs are treated as strings instead of integers.')
arggroup.add_argument('--classes_from', type = str, default = None, help = 'Optionally, a path to a pickle dump containing a dictionary with item "ind2label" specifying the classes to be considered. These should be in the same order as the classes predicted by the model.')
arggroup.add_argument('--augmentation_epochs', type = int, default = 1, help = 'Number of training image augmentations when training an SVM on top of embeddings.')
arggroup.add_argument('--C', type = float, default = 0.1, help = 'Weight of the error in SVM loss.')
arggroup.add_argument('--batch_size', type = int, default = 1, help = 'Batch size for feature extraction. Must divide the number of test images evenly.')
arggroup = parser.add_argument_group('Features')
arggroup.add_argument('--architecture', type = str, default = 'simple', choices = utils.ARCHITECTURES, help = 'Type of network architecture.')
arggroup.add_argument('--model', type = str, action = 'append', required = True, help = 'Path to a keras model dump used for extracting image features.')
arggroup.add_argument('--layer', type = str, action = 'append', required = True, help = 'Name or index of the layer to extract features from.')
arggroup.add_argument('--label', type = str, action = 'append', help = 'Label for the corresponding features.')
arggroup.add_argument('--norm', type = str2bool, action = 'append', help = 'Whether to L2-normalize the corresponding features or not (defaults to False).')
arggroup.add_argument('--prob_features', type = str2bool, action = 'append', help = 'Whether to use the extracted features as class probabilities instead of training an SVM.')
arggroup.add_argument('--centroids', type = str, action = 'append', help = 'Optionally, a pickle dump containing a dictionary with an item "embedding" referring to a numpy array of class centroids for performing nearest-neighbor classification.')
args = parser.parse_args()
# Load dataset
if args.classes_from:
with open(args.classes_from, 'rb') as f:
embed_labels = pickle.load(f)['ind2label']
else:
embed_labels = None
data_generator = get_data_generator(args.dataset, args.data_root, classes = embed_labels)
# Load class hierarchy
id_type = str if args.str_ids else int
hierarchy = ClassHierarchy.from_file(args.hierarchy, is_a_relations = args.is_a, id_type = id_type) if args.hierarchy else None
# Learn SVM classifier on training data and evaluate on test data
custom_objects = utils.get_custom_objects(args.architecture)
custom_objects['labelembed_loss'] = labelembed_loss
perf = OrderedDict()
for i, model in enumerate(args.model):
model_name = args.label[i] if (args.label is not None) and (i < len(args.label)) else os.path.splitext(os.path.basename(model))[0]
if (args.layer is not None) and (i < len(args.layer)):
try:
layer = int(args.layer[i])
except ValueError:
layer = args.layer[i]
else:
layer = None
normalize = args.norm[i] if (args.norm is not None) and (i < len(args.norm)) else False
prob_features = args.prob_features[i] if (args.prob_features is not None) and (i < len(args.prob_features)) else False
centroids = args.centroids[i] if (args.centroids is not None) and (i < len(args.centroids)) else ''
sys.stderr.write('-- {} --\n'.format(model_name))
if prob_features:
pred = extract_predictions(data_generator, model, layer, custom_objects, args.batch_size)
elif centroids:
pred = nn_classification(data_generator, centroids, model, layer, custom_objects, args.batch_size)
else:
pred = train_and_predict(data_generator, model, layer, normalize, args.augmentation_epochs, args.C, custom_objects, args.batch_size)
perf[model_name] = evaluate(pred, data_generator, hierarchy)
# Show results
print_performance(perf)