-
Notifications
You must be signed in to change notification settings - Fork 0
/
fcnet_analyzer.py
339 lines (303 loc) · 15.8 KB
/
fcnet_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
import copy
import itertools
import numpy as np
import scipy.stats as stats
import pandas as pd
from tabular_benchmarks import FCNetSliceLocalizationBenchmark
from tabular_benchmarks import FCNetProteinStructureBenchmark
from tabular_benchmarks import FCNetNavalPropulsionBenchmark
from tabular_benchmarks import FCNetParkinsonsTelemonitoringBenchmark
from tabular_benchmarks import nas_cifar10
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.configuration_space import Configuration
from ConfigSpace.hyperparameters import CategoricalHyperparameter
from ConfigSpace.hyperparameters import OrdinalHyperparameter
import helper
from helper import Progress
from benchmark_analyzer import Benchmark
class FCNetBenchmark(Benchmark):
def __init__(self,
data_dir='./fcnet_tabular_benchmarks/',
dataset='slice_localization',
confidence_interval='student-t',
confidence_level=0.95,
dataset_type=['validation', 'test'],
**kwargs):
self._validate_confidence_level(confidence_level)
self._validate_confidence_interval(confidence_interval)
self._validate_dataset_type(dataset_type)
datasets = ['slice_localization',
'protein_structure',
'naval_propulsion',
'parkinsons_telemonitoring',
'cifar10_A', 'cifar10',
'cifar10_B',
'cifar10_C']
if dataset == 'slice_localization':
b = FCNetSliceLocalizationBenchmark(data_dir=data_dir)
elif dataset == 'protein_structure':
b = FCNetProteinStructureBenchmark(data_dir=data_dir)
elif dataset == 'naval_propulsion':
b = FCNetNavalPropulsionBenchmark(data_dir=data_dir)
elif dataset == 'parkinsons_telemonitoring':
b = FCNetParkinsonsTelemonitoringBenchmark(data_dir=data_dir)
#elif dataset in ['cifar10_A', 'cifar10']:
# b = nas_cifar10.NASCifar10A(data_dir, False)
#elif dataset == 'cifar10_B':
# b = nas_cifar10.NASCifar10B(data_dir, False)
#elif dataset == 'cifar10_C':
# b = nas_cifar10.NASCifar10C(data_dir, False)
else:
raise ValueError('dataset must be one of {}. '
'provided {}.'.format(datasets, dataset))
self.benchmark = b
config_space = b.get_configuration_space()
self.confidence_interval = confidence_interval
self.confidence_level = confidence_level
if isinstance(dataset_type, list):
self.dataset_type = dataset_type
else:
self.dataset_type = [dataset_type]
self._num_trials = 4
X, y, y_bounds, samples, runtimes, hyperparameters \
= self.get_data_dump(config_space, dataset)
# Temporarily use only the validation data instead of the test data
#samples = samples[:, 4:8]
#print("Warning, we're dropping the validation data and only using "
# "the test data")
self.runtimes = runtimes
super().__init__(X, samples, hyperparameters, config_space,
confidence_level=confidence_level,
y_bounds=y_bounds,
scenario_name=dataset,
**kwargs)
def _validate_confidence_level(self, confidence_level):
if not (isinstance(confidence_level, float) and
(0 < confidence_level and confidence_level < 1)):
raise ValueError('confidence_level must be a float in (0,1). '
'Provided {}.'.format(confidence_level))
def _validate_confidence_interval(self, confidence_interval):
options = ['student-t',
'bootstrap',
'nested student-t bootstrap',
'nested bootstrap']
if not (isinstance(confidence_interval, str) and
confidence_interval in options):
raise ValueError('confidence_interval must be in {}.'
'Provided {}.'.format(options,
confidence_interval))
def _validate_dataset_type(self, dataset_type):
if not isinstance(dataset_type, list):
original = copy.deepcopy(dataset_type)
dataset_type = [dataset_type]
for d in dataset_type:
if d not in ['train', 'validation', 'test']:
raise ValueError('dataset_type must be a subset of '
'["train", "validation", "test"]. '
'Provided {}.'.format(original))
def get_data_dump(self, config_space, dataset):
"""
Gets the raw data, calculates confidence intervals and returns all of it.
"""
value_lists = []
name_list = []
for hp in config_space.get_hyperparameters():
name_list.append(hp.name)
if isinstance(hp, OrdinalHyperparameter):
values = hp.sequence
elif isinstance(hp, CategoricalHyperparameter):
values = hp.choices
else:
raise NotImplementedError('Only Categorical and Ordinal Hyperparameters are '
'supported at the moment. '
'Provided {}. '.format(type(hp)))
value_lists.append(list(values))
value_grid = list(itertools.product(*value_lists))
config_ids = []
configurations = []
progress = Progress(len(value_grid),
'{}% done enumerating all configurations')
for values in value_grid:
progress.update()
values = dict(zip(name_list, values))
config = Configuration(config_space, values=values)
config_id = self.get_unique_configuration_identifier(config)
config_ids.append(config_id)
configurations.append(config)
configuration_table = pd.DataFrame({'Configuration ID': config_ids,
'Configuration': configurations})
configuration_table = configuration_table.drop_duplicates('Configuration ID')
progress = Progress(len(value_grid),
'{}% done extracting the sample values')
# Check to see if we can load the data samples directly
try:
samples = helper.loadObj('./samples/', dataset)
runtime_samples = helper.loadObj('./samples/', 'runtimes-' + dataset)
except:
samples = []
runtime_samples = []
for config in configuration_table['Configuration']:
progress.update()
sample = []
runtime_sample = []
for dataset_type in self.dataset_type:
sample.append([])
runtime_sample.append([])
for i in range(int(round(self._num_trials))):
result = self.benchmark.objective_function_deterministic(config,
index=i,
dataset=dataset_type)
sample[-1].append(result[0])
runtime_sample[-1].append(result[1])
samples.append(sample)
runtime_samples.append(runtime_sample)
samples = np.asarray(samples)
runtime_samples = np.asarray(runtime_samples)
helper.saveObj('./samples/', samples, dataset)
helper.saveObj('./samples/', runtime_samples, 'runtimes-' + dataset)
helper.saveObj('./samples/', np.asarray(configuration_table['Configuration']),
'configurations-' + dataset)
confidence_level = self.confidence_level
if self.confidence_interval == 'nested student-t bootstrap':
cis = self.confidence_interval_nested_student_bootstrap(samples, confidence_level)
elif self.confidence_interval == 'nested bootstrap':
cis = self.confidence_interval_nested_bootstrap(samples, confidence_level)
elif self.confidence_interval == 'student-t':
cis = self.confidence_interval_student(samples, confidence_level)
#elif self.confidence_interval == 'bootstrap':
# cis = self.confidence_interval_bootstrap(samples, confidence_level)
X = np.array(pd.DataFrame(value_grid).infer_objects())
y_bounds = cis
samples = samples.reshape((len(samples), -1))
y = np.mean(samples, axis=1)
runtimes = runtime_samples.reshape((len(samples), -1))
hyperparameters = name_list
return X, y, y_bounds, samples, runtimes, hyperparameters
def confidence_interval_nested_bootstrap(self,
samples,
confidence_level):
n_data = len(samples)
num_trials = self._num_trials
num_datasets = 2
alpha = 1 - confidence_level
n_bootstrap = int(round(10/(alpha/2) + 1))
# we only have two data points, one for each data set. So rather than using
# random bootstrap samples we enumerate all of them
idx = [0,1]
inner_b_idx = list(itertools.product(idx, idx))
# Take these bootstrap samples
b_inner_samples = samples[:,inner_b_idx,:]
# Calculate the mean of each one and reshape so that we end up with an arary
# of length n_data, with each entry containing the list of all possible sub
# statistics
b_inner_means = np.mean(b_inner_samples, axis=2).reshape((n_data, -1))
# Calculate the outer bootstrap samples
outer_b_idx = np.random.randint(0, num_datasets*num_trials, (n_bootstrap, num_trials))
# Get the outer bootstrap samples
b_outer_samples = b_inner_means[:, outer_b_idx]
# And calculate the statistics for them
b_outer_means = np.mean(b_outer_samples, axis=2)
# Calculate the confidence interval
lower, upper = np.quantile(b_outer_means,
[alpha/2, 1-alpha/2],
axis=1,
interpolation='nearest')
return np.array([lower, upper]).transpose()
def confidence_interval_nested_student_bootstrap(self,
samples,
confidence_level):
n_data = len(samples)
num_trials = self._num_trials
num_datasets = 2
alpha = 1 - confidence_level
n_bootstrap = int(round(10/(alpha/2) + 1))
# we only have two data points, one for each data set. So rather than using
# random bootstrap samples we enumerate all of them
idx = [0,1]
inner_b_idx = list(itertools.product(idx, idx))
# Take these bootstrap samples
b_inner_samples = samples[:,inner_b_idx,:]
# Calculate the mean of each one and reshape so that we end up with an arary
# of length n_data, with each entry containing the list of all possible sub
# statistics
b_inner_means = np.mean(b_inner_samples, axis=2).reshape((n_data, -1))
# Calculate the outer bootstrap samples
outer_b_idx = np.random.randint(0, num_datasets*num_trials, (n_bootstrap, num_trials))
# Get the outer bootstrap samples
b_outer_samples = b_inner_means[:, outer_b_idx]
# And calculate the statistics for them
b_outer_means = np.mean(b_outer_samples, axis=2)
# Calculate the standard deviation of the sample statistics
b_outer_std = np.std(b_outer_means, axis=1)
# Calculate the standard errors for each bootstrap sample
b_outer_std_errs = np.std(b_outer_samples, axis=2) / np.sqrt(num_trials)
# Calculate the actual sample means
sample_means = np.mean(np.mean(samples, axis=1), axis=1)
# Broadcast them across the bootstrap samples
sample_means_broadcasted = np.array([sample_means
for _ in range(n_bootstrap)]).transpose()
# Calculate the studentized test statistics (q values)
t_statistics = (b_outer_means - sample_means_broadcasted) / b_outer_std_errs
# Get the quantiles of the t_statistics
# When we calculate the lower bound we treat nans as negative inf
# This is a conservative thing to do, since a nan came from a 0/0 and
# depending on whether or not you approach this limit from the
# positive or negative direction you will either get positive or
# negative infinity. Ultimately, this means that we will end up with
# slightly larger confidence intervals, so overconfidence will not
# arise due to poor handling of nans.
# Interpolation='nearest' is necessary for cases with weird confidence
# levels such that the quantiles don't correspond exactly to a
# single data point. If any other interpolation method is used nans
# end up being returned any time interpolation between infinity and
# another number occurs. Since we're automatically calculating the
# number of bootstrap samples to be such that a particular data point
# while be close to the desired quantile, it doesn't matter too much
# that we are interpolating here.
lower = np.quantile(np.nan_to_num(t_statistics,
nan=-np.inf,
posinf=np.inf,
neginf=-np.inf),
alpha/2,
axis=1,
interpolation='nearest')
# Treat nans as positive infinity. See above
upper = np.quantile(np.nan_to_num(t_statistics,
nan=np.inf,
posinf=np.inf,
neginf=-np.inf),
1-alpha/2,
axis=1,
interpolation='nearest')
# Now compute the final confidence interval bounds
lower_bound = sample_means - b_outer_std*upper
upper_bound = sample_means - b_outer_std*lower
return np.array([lower_bound, upper_bound]).transpose()
def confidence_interval_student(self,
samples,
confidence_level):
# Treat validation and test samples identically
samples = samples.reshape((len(samples), -1))
ci = stats.t.interval(confidence_level,
samples.shape[1] - 1,
loc=np.mean(samples, axis=1),
scale=stats.sem(samples, axis=1))
ci = np.array(ci).transpose()
# nans occur everywhere that the standard deviation is 0
# We want to fill these with confidence intervals equal to the point estimate
ci = pd.DataFrame(ci)
ci[0] = ci[0].fillna(pd.Series(samples[:,0]))
ci[1] = ci[1].fillna(pd.Series(samples[:,0]))
return np.array(ci)
def confidence_interval_bootstrap(self,
samples,
confidence_level):
# Treat validation and test samples identically
samples = samples.reshape((len(samples), -1))
alpha = 1 - confidence_level
n_bootstrap = int(round(10/(alpha/2) + 1))
bidx = np.random.randint(0, self._num_trials, (n_bootstrap, self._num_trials))
b_samples = samples[:,bidx]
b_means = np.mean(b_samples, axis=2)
ci = np.quantile(b_means, [alpha/2, 1-alpha/2], axis=1).transpose()
return ci