-
Notifications
You must be signed in to change notification settings - Fork 0
/
benchmark_analyzer.py
2205 lines (2003 loc) · 101 KB
/
benchmark_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import time
import itertools
import copy
from multiprocessing import Pool
from numbers import Number
import tqdm
import numpy as np
import pandas as pd
import scipy
from scipy.spatial import ConvexHull
from scipy.optimize import linprog
from scipy.stats import spearmanr
import altair as alt
import streamlit as st
import ConfigSpace
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.configuration_space import Configuration
from ConfigSpace.hyperparameters import CategoricalHyperparameter
from ConfigSpace.hyperparameters import OrdinalHyperparameter
from ConfigSpace.hyperparameters import UniformFloatHyperparameter
from fanova import fANOVA
from heap import Heap
import helper
from helper import Progress
class Benchmark:
def __init__(self,
X,
y_samples,
hyperparameters,
config_space,
confidence_level=0.95,
y_bounds=None,
scenario_name='benchmark',
permute_objective=False,
seed=None,
y=None):
"""__init__
Parameters
----------
X: numpy.ndarray
A numpy array containing the grid of evaluated configurations. Each
row is a configuration and each column is a hyper-parameter. The
order of the hyperparameter columns must match the list of names of
the hyperparameters in hyperparameters.
y_samples: numpy.ndarray
The set of performance measurements (samples) for each
configuration contained in X. The number of rows must match
X, and the entries must be in the same order. Samples within
each row should be IID, and each samples within each column
should block on the same features (e.g., training/validiation
sets).
hyperparameters: list of str
The list of hyperparameter names as they appear in the grid X.
Must be of length equal to X.shape[1].
config_space: ConfigSpace.configuration_space.ConfigurationSpace
The configuration space of the algorithm being analyzed. Currently
only supports categorical and ordinal parameters. Does not
support conditional parameters. Integer- or real-valued parameters
should be converted to ordinal parameters whose values match the
entries in the configurations grid X.
y_bounds: numpy.ndarray
The confidence intervals for the performance of the configurations.
Must contain the same number of rows and X, and must contain
exacty two columns, in the order: [lower bound, upper bound].
Row entries must be in the same order as the entries in X.
If None, the bounds will be determined for you automatically by
calculating student-t-based confidence intervals with the
specified significance level.
scenario_name : str (optional)
The name of the scenario being analyzed.
permute_objective : bool
If True, the association between the rows of X and y is broken
by randomly shuffling the rows of y. This allows you to verify that
the confidence intervals are not too large and/or the configuration
space is not too connected to be able to reject uni-modality of the
landscape for a randomly permuted objective function.
seed : int | None
The random seed used by the benchmark analyzer.
y : numpy.ndarray | None
The best point-estimate for the loss of the configurations. If
None, it will be calculated for you as the mean of y_samples.
"""
# Assume one until we can prove otherwise
self._num_modes = 1
self._permute_objective = permute_objective
self._validate_config_space(config_space)
self.config_space = config_space
if y is None:
y = np.mean(y_samples, axis=1)
if y_bounds is None:
y_bounds = helper.confidence_interval_student(y_samples,
confidence_level)
# @TODO: Validate X, y_samples, y_bounds and hyperparameters.
self._random = np.random.RandomState(seed)
self._global_minima = None
self._dataset = scenario_name
self.X = X
self.y = y
self.y_bounds = y_bounds
self.y_samples = y_samples
self.hyperparameters = hyperparameters
self.confidence_level = confidence_level
self.fANOVA = None
n_unique = len(pd.DataFrame(X).drop_duplicates())
n_original = len(X)
if n_unique != n_original:
raise ValueError('The configuration dataframe, X, contains {} '
'duplicatated configurations.'
''.format(n_original - n_unique))
self._initialize_configuration_table(X,
y,
y_samples,
y_bounds,
hyperparameters)
def _validate_config_space(self, config_space):
if not (isinstance(config_space, ConfigurationSpace) and
len(config_space.get_conditions()) == 0):
raise NotImplementedError('Configuration Spaces with conditional hyperparameters '
'are not currently supported.')
for hp in config_space.get_hyperparameters():
if not (isinstance(hp, OrdinalHyperparameter)
or isinstance(hp, CategoricalHyperparameter)):
raise NotImplementedError('Only Categorical and Ordinal Hyperparameters are '
'supported at the moment. '
'Provided {}. '.format(type(hp)))
def _initialize_configuration_table(self,
X,
y,
y_samples,
y_bounds,
hyperparameters):
"""
Creates a pandas dataframe with an entry for every valid configuration
in self.config_space.
"""
config_ids = []
configurations = []
progress = Progress(len(X),
'{}% done making the configuration table')
for values in X:
progress.update()
values = dict(zip(hyperparameters, values))
config = Configuration(self.config_space, values=values)
config_id = self.get_unique_configuration_identifier(config)
config_ids.append(config_id)
configurations.append(config)
# Randomly permute the objective function confidence intervals if necessary
if self._permute_objective:
state = self._random.get_state()
self._random.shuffle(y)
self._random.set_state(state)
self._random.shuffle(y_bounds)
self._random.set_state(state)
self._random.shuffle(y_samples)
# Note that Quality in this table is the minimum solution quality necessary
# to reach the configuration from the global optimum. Not the estimated quality.
self._configuration_table = pd.DataFrame({'Configuration ID': config_ids,
'Configuration': configurations,
'Visited': False,
'Parent': None,
'Quality': float('inf'),
'Estimated Objective': y,
'Lower Bound': y_bounds[:,0],
'Upper Bound': y_bounds[:,1]})
progress = Progress(len(X),
'{}% done extracting the sample values')
self._confidence_interval \
= dict(zip(np.array(self._configuration_table['Configuration ID']),
y_bounds))
self._objective \
= dict(zip(np.array(self._configuration_table['Configuration ID']),
y))
self._samples \
= dict(zip(np.array(self._configuration_table['Configuration ID']),
y_samples))
self._mark_all_unreachable()
self._stale_table = False
def get_confidence_interval(self,
config):
config_id = self.get_unique_configuration_identifier(config)
return self._confidence_interval[config_id]
def get_neighbours(self,
config):
"""get_neighbours
Given a configuration, returns its list of neighbours. The
neighbourhood of a configuration is defined by mutating individual
parameters of the configuration. Ordinal parameter values can be
increased or decreased by one unit, and categorical parameter values
can be changed to any other value.
Parameters
----------
config : ConfigSpace.configuration_space.Configuration
The configuration whose neighbours you are getting.
Returns
-------
neighbours : list of ConfigSpace.configuration_space.Configuration
The neighbours of config.
"""
neighbours = []
for hp in self.config_space.get_hyperparameters():
if isinstance(hp, OrdinalHyperparameter):
# Ordinal parameters may move up or down by one index
values = []
idx = list(hp.sequence).index(config[hp.name])
if idx-1 in range(len(hp.sequence)):
values.append(hp.sequence[idx-1])
if idx+1 in range(len(hp.sequence)):
values.append(hp.sequence[idx+1])
elif isinstance(hp, CategoricalHyperparameter):
# Any single categorical value may be swapped with another
values = list(hp.choices)
values.remove(config[hp.name])
else:
raise NotImplementedError('Only Categorical and Ordinal Hyperparameters are '
'supported by get_neighbours() at the moment. '
'Provided {}. '.format(type(hp)))
for value in values:
neighbour = copy.deepcopy(config)
neighbour[hp.name] = value
neighbours.append(neighbour)
return neighbours
def get_unique_configuration_identifier(self,
config):
"""get_unique_configuration_identifier
Outputs a string representation of the dictionary that contains this configuration.
However, the dictionary keys are sorted to ensure that the representation is unique.
Parameters
----------
config : Configuration
The configuration to convert to a unique dictionary
Returns
-------
s : str
The unique string representation of config
"""
if not (isinstance(config, Configuration) or config is None):
raise ValueError('Expected config to be a Configuration or None. '
'Provided {}.'.format(config))
if config is None:
return 'None'
d = config.get_dictionary()
s = []
for key in sorted(d.keys()):
value = d[key]
if isinstance(value, str):
value = "'{}'".format(value)
elif isinstance(value, Number):
if float(value).is_integer():
value = int(value)
else:
value = float(value)
s .append("'{}': {}".format(key,
value))
return '{' + ', '.join(s) + '}'
def test_reject_unimodality(self, stop_early=False, verbose=True):
"""
Checks to see if the current benchmark is unimodal by applying
Dijkstra's algorithm on the configuration space graph defined
implicitly by get_neighbours(). If, starting from the configuration
with the global minima lower bound, it is possible to construct a tree
with monotically increasing possible objective function values
(according to the confidence intervals) then it is not possible to
reject the possibility that the benchmark is unimodal.
We construct this tree using Dijkstra's algorithm, where the distance
is the solution quality and an edge exists between a source
configuration and a destination configuration if minimum solution
quality needed to reach the source is not larger than the upper bound
for the confidence interval of the destination. The distance for
reaching the destination from the source is the maximum of the source's
distance and the lower bound of the confidence interval for the
destination. The search process starts at the global minima for the
lower bounds of the confidence intervals. If the resulting tree does not
span the entire (discretized) configuration state space, then we can
conclude that it is not uni-modal.
Parameters
----------
stop_early : bool
If stop_early is True, then the search with terminate as soon as it
can be concluded that all configurations are reachable from the
global optimum using some path. Otherwise, the search process
continues until the shortest path to each configuration has been
found.
verbose : bool
If True, progress about the current state of the search will be
printed to the console.
Returns
-------
unimodal : bool
True if the benchmark is significantly non-unimodal, otherwise
False.
"""
heap = Heap()
self._mark_all_unvisited()
config = self.get_smallest_unvisited_minima()
config_id = self.get_unique_configuration_identifier(config)
ci = self.get_confidence_interval(config)
self._mark_reachable(config)
heap.push(ci[0], config_id, (config, None))
iteration = 0
while len(heap) > 0:
quality, config_id, (config, parent) = heap.pop()
self._mark_visited(config, quality, parent)
iteration += 1
neighbours = self.get_neighbours(config)
if verbose:
if iteration % 1000 == 0 or len(heap) == 0:
print('Iteration: {0}; Heap size: {1}; Still Unreached: {4}; Neighbours: {2:02d}; Quality: {3:.5f}'
''.format(iteration, len(heap), len(neighbours),
quality, self._still_unreachable), end='\n', flush=True)
elif iteration % 100 == 0 and False:
print('Iteration: {0}; Heap size: {1}; Still Unreached: {4} Neighbours: {2:02d}; Quality: {3:.5f}'
''.format(iteration, len(heap), len(neighbours),
quality, self._still_unreachable), end='\r', flush=True)
for neighbour in neighbours:
if self._was_visited(neighbour):
continue
ci = self.get_confidence_interval(neighbour)
if quality <= ci[1]:
self._mark_reachable(neighbour)
heap.push_if_better(max(quality, ci[0]),
self.get_unique_configuration_identifier(neighbour),
(neighbour, config))
if stop_early:
# Check to see if everything is reachable, if it is, we don't actually have to
# continue the search to conclude that there are no more modes. However, continuing
# the search does have the advantage that we can determine which configurations are
# uniquely reachable from each mode.
if self._still_unreachable == 0:
break
all_visited = self._all_visited()
if verbose:
print('{0:.2f}% of the state space was visited during the search procedure.'
''.format(self._percent_visited))
print('{} of the configurations were determined to remain unreachable.'
''.format(self._still_unreachable))
if self._still_unreachable > 0:
reject = 'reject'
else:
reject = 'cannot reject'
# @TODO: confidence_interval is never initialized in this class. Right
# now we're relying on the existence of a definition for the
# confidence level that comes from the child class
print('Therefore, we {} the possibility that this benchmark has {} mode{} or less '
'with {}% confidence.'.format(reject,
self._num_modes,
'' if self._num_modes == 1 else 's',
self.confidence_level*100), flush=True)
#self._configuration_table.to_csv('debug.csv')
return self._still_unreachable > 0
def count_modes(self, verbose=True):
"""count_modes
Count the number of modes in the landscape by iteratively applying
test_reject_unimodality until all configurations have been reached at
least once.
Parameters
----------
verbose : bool
If True, information about the current progress of the search will
be printed to the console.
Returns
-------
num_modes : int
The minimum number of nodes that can be proved must exist given the
confidence intervals provided.
"""
self._num_modes = 1
self._mark_all_unvisited()
self._mark_all_unreachable()
not_all_visited = True
while not_all_visited:
# The following function actually runs Dijkstra's algorithm from the smallest unvisited
# point, so calling it multiple times with increasing values to self._num_modes will
# effectively count the number of modes
not_all_visited = self.test_reject_unimodality()
if not_all_visited:
self._num_modes += 1
return self._num_modes
def _mark_all_unvisited(self):
self._stale_table = True
config_ids = np.array(self._configuration_table['Configuration ID'])
self._visited = dict(zip(config_ids,
np.full(len(config_ids), False)))
self._quality = dict(zip(config_ids,
np.full(len(config_ids), float('inf'))))
self._parent = dict(zip(config_ids,
np.full(len(config_ids), None)))
def _mark_all_unreachable(self):
config_ids = np.array(self._configuration_table['Configuration ID'])
self._reachable = dict(zip(config_ids,
np.full(len(config_ids), False)))
self._still_unreachable = len(config_ids)
def _mark_visited(self,
config,
quality,
parent):
self._stale_table = True
config_id = self.get_unique_configuration_identifier(config)
self._visited[config_id] = True
self._quality[config_id] = quality
self._parent[config_id] = self.get_unique_configuration_identifier(parent)
def _mark_reachable(self,
config):
# A less strict version of visited. Technically, as soon as every
# configuration has been reached, we know that the space is uni-modal.
# Therefore, we don't need to go until we've visited every single configuration.
self._stale_table = True
config_id = self.get_unique_configuration_identifier(config)
if not self._reachable[config_id]:
# Only decrement the counter if we haven't been able to reach this configuration before
self._still_unreachable -= 1
self._reachable[config_id] = True
def _was_visited(self,
config):
config_id = self.get_unique_configuration_identifier(config)
return self._visited[config_id]
def _get_all_reachable(self):
return self._still_unreachable == 0
def _get_visited_at_least_once(self):
if self._stale_table:
self._update_table()
visited = self._configuration_table[['Visited from mode {}'
''.format(i+1) for i in range(self._num_modes)]]
visited = np.any(np.array(visited), axis=1)
return visited
def _all_visited(self):
if self._stale_table:
self._update_table()
visited = self._get_visited_at_least_once()
num_visited = np.sum(visited)
num_total = len(self._configuration_table)
self._percent_visited = num_visited/num_total*100
return num_visited == num_total
def get_unique_reachability_sizes(self):
# We define the unique reachability set of a mode as the set of configuration that can
# only be reached from within that mode while moving upwards.
visit_columns = ['Visited from mode {}'.format(i+1) for i in range(self._num_modes)]
visited = np.array(self._configuration_table[visit_columns])
# Get the index of all configurations which can only be reach from a single mode
idx = np.sum(visited, axis=1) == 1
# And then count them for each mode.
return np.array(self._configuration_table[idx][visit_columns].sum())
def _update_table(self):
self._stale_table = False
visited = []
quality = []
parent = []
reachable = []
for config_id in self._configuration_table['Configuration ID']:
visited.append(self._visited[config_id])
quality.append(self._quality[config_id])
parent.append(self._parent[config_id])
reachable.append(self._reachable[config_id])
self._configuration_table['Visited from mode {}'.format(self._num_modes)] = visited
self._configuration_table['Quality from mode {}'.format(self._num_modes)] = quality
self._configuration_table['Parent from mode {}'.format(self._num_modes)] = parent
self._configuration_table['Reachable'] = reachable
def get_smallest_unvisited_minima(self):
# It can be proved, that if the entire configuration space is not reachable from the
# configuration with the smallest upper bound, then there does not exist any point
# with a reachability set that contains the entire configuration space.
# Hint: Prove by contradiction.
visited = self._get_visited_at_least_once()
df = self._configuration_table[~visited]
indexer = df['Upper Bound'] == df['Upper Bound'].min()
smallest = df[indexer].iloc[0]['Configuration']
if self._num_modes == 1:
self._global_minima = smallest
return smallest
def get_score(self, configuration, drop_probability=0):
if self._random.rand() < drop_probability:
return float('inf')
else:
config_id = self.get_unique_configuration_identifier(configuration)
return self._objective[config_id]
def optimize_hyperparameters(self, config, drop_probability=0, iterations=10, update='Multi-Incumbent'):
"""optimize_hyperparameters
A simplistic approach to optimizing the hyper-parameters of an
algorithm. It merely performs a 1-dimensional optimization of each
parameter independently and then simultaneously updates all of them.
This process is embarassingly parallel and can be repeated until
convergence.
Parameters
----------
config : Configuration
The initial starting configuration for the search
drop_probability : float
A float in [0, 1). The probability that configurations are
randomly dropped in each iteration of the method. If this were
applied with the evaluation of each configuration in parallel, it
may be optimal to end each iteration after a sufficiently large
percentage of the configurations have been evaluated and then
to treat anything left un-evaluated as censored, rather than
possibly blocking and waiting for a long time for those final few
configurations to finish. By specifing the probability that a
configuration will be dropped in each iteration we can simulate
this effect. Note: We assume that the quality of each configuration
is independent of its probability of being slowest and therefore
dropped, which is probably an over-simplification.
iterations : int
A positive integer. The number of iterations of the method to
perform.
Returns
-------
anytime_performance : list of float
The performance of the incumbent configuration at each iteration.
anytime_incumbent : list of Configuration
The list of anytime incumbents for each iteration.
centroid_performance : list of float
The performance of the centroid configuration at each iteration.
anytime_centroid : list of Configuration
The list of centroids used at each iteration.
"""
centroid = config
incumbent = copy.deepcopy(config)
incumbent_score = self.get_score(config)
anytime_performance = []
anytime_incumbent = []
centroid_performance = []
anytime_centroid = []
anytime_incumbent.append(incumbent)
anytime_performance.append(incumbent_score)
for i in range(iterations):
score = self.get_score(centroid)
anytime_centroid.append(centroid)
centroid_performance.append(score)
next_centroid = copy.deepcopy(centroid)
# In practice this entire loop and the one within it would be parallelized
# completely.
for hp in self.config_space.get_hyperparameters():
if isinstance(hp, OrdinalHyperparameter):
values = list(hp.sequence)
elif isinstance(hp, CategoricalHyperparameter):
# Any single categorical value may be swapped with another
values = list(hp.choices)
else:
raise NotImplementedError('Only Categorical and Ordinal '
'Hyperparameters are supported '
'at the moment. '
'Provided {}. '.format(type(hp)))
best_score = float('inf')
best = None
# Break ties uniformly at random.
self._random.shuffle(values)
for value in values:
challenger = copy.deepcopy(centroid)
challenger[hp.name] = value
score = self.get_score(challenger, drop_probability)
# Check if this is better than the current best for this slice
if score <= best_score:
best_score = score
best = copy.deepcopy(challenger)
# Check if this is better than all confiugrations so far
if score <= incumbent_score:
incumbent_score = score
incumbent = challenger
# Update the centroid
next_centroid[hp.name] = best[hp.name]
if update == 'Single-Incumbent':
centroid = incumbent
else:
centroid = next_centroid
anytime_incumbent.append(incumbent)
anytime_performance.append(incumbent_score)
return anytime_performance, anytime_incumbent, \
centroid_performance, anytime_centroid
def optimize_hyperparameters_randomly(self, config, drop_probability=0, iterations=10):
"""optimize_hyperparameters_randomly
Performs the same amount of work as it's non-random counterpart, but
instead of doing anything intelligent within each iteration it just
randomly samples configurations.
Parameters
----------
config : Configuration
The initial starting configuration for the search
drop_probability : float
A float in [0, 1). The probability that configurations are
randomly dropped in each iteration of the method. If this were
applied with the evaluation of each configuration in parallel, it
may be optimal to end each iteration after a sufficiently large
percentage of the configurations have been evaluated and then
to treat anything left un-evaluated as censored, rather than
possibly blocking and waiting for a long time for those final few
configurations to finish. By specifing the probability that a
configuration will be dropped in each iteration we can simulate
this effect. Note: We assume that the quality of each configuration
is independent of its probability of being slowest and therefore
dropped, which is probably an over-simplification.
iterations : int
A positive integer. The number of iterations of the method to
perform.
Returns
-------
anytime_performance : list of float
The performance of the incumbent configuration at each iteration.
anytime_incumbent : list of Configuration
The list of anytime incumbents for each iteration.
"""
incumbent = copy.deepcopy(config)
incumbent_score = self.get_score(config)
anytime_performance = []
anytime_incumbent = []
anytime_incumbent.append(incumbent)
anytime_performance.append(incumbent_score)
for i in range(iterations):
# In practice this entire loop and the one within it would be parallelized
# completely.
for hp in self.config_space.get_hyperparameters():
if isinstance(hp, OrdinalHyperparameter):
values = list(hp.sequence)
elif isinstance(hp, CategoricalHyperparameter):
# Any single categorical value may be swapped with another
values = list(hp.choices)
else:
raise NotImplementedError('Only Categorical and Ordinal '
'Hyperparameters are supported '
'at the moment. '
'Provided {}. '.format(type(hp)))
for value in values:
challenger = self.config_space.sample_configuration()
score = self.get_score(challenger, drop_probability)
# Check if this is better than all confiugrations so far
if score <= incumbent_score:
incumbent_score = score
incumbent = challenger
anytime_incumbent.append(incumbent)
anytime_performance.append(incumbent_score)
return anytime_performance, anytime_incumbent
def _embed_y(self, X, y_samples):
"""_embed_y
Embeds the configurations into a matrix with dimensions equal to the
number of hyperparmaeters.
Parameters
----------
X : np.ndarray
A 2D array containing all of the configurations, one configuration
per row.
y_samples : np.ndarray
A 1 or 2D array containing samples of objective function
performance scores. Rows correspond to the configurations of X.
Returns
-------
embedded_y : np.ndarray
The configuration performance estimates embedded in the dimensions
of the hyperparameter configurations. All numerical parameters are
encoded such that adjacent values are adjacent in the embedding
space. The last dimensions of embedded_y contains the samples of y.
value_lists : list of lists
Each of the first n-1 dimensions of embedded_y correspond to a
particular hyperparameter. Each list in value_lists corresponds
to the matching parameter, and the order of the parameter values
as they appear in the embedded space is provided by the
corresponding list.
dimensions : tuple of ints
The shape of embedded_y, excluding the last dimension.
"""
X = np.array(copy.deepcopy(X))
y = np.array(copy.deepcopy(y_samples))
if len(y.shape) == 1:
y = np.reshape(y, (len(y), 1))
def add_columns(data, col_name):
cols = []
for i in range(data.shape[1]):
df[col_name.format(i)] = data[:,i]
cols.append(col_name.format(i))
return cols
df = pd.DataFrame()
x_cols = add_columns(X, 'X_{}')
y_cols = add_columns(y, 'y_{}')
df = df.sort_values(by=x_cols)
X = np.array(df[x_cols])
y = np.array(df[y_cols])
dimensions = tuple(df[x_cols].nunique())
# We now want to reshape y so that the index of each of the dimensions
# (except the last one) provide the indices for the respective values
# of the parameters. That is, the relative position of each of the
# y_samples is the same as if the y_values were in an n-dimesional
# grid with unit length between each of their parameter values.
embedded_y = y.reshape(dimensions + (-1,))
# The index of a set of samples in embedded_y indexes the parameter
# values as found in value_lists
value_lists = []
for x_col in x_cols:
values = sorted(list(set(list(df[x_col]))))
value_lists.append(values)
return embedded_y, value_lists, dimensions
def _encode_categoricals_as_ints(self, y, value_lists, dimensions):
"""encode_categoricals_as_ints
Since we're calculating partial derivatives with finite differences,
we can generalize this to be applied to the categorical variables
for all four-cycles of configurations. The only piece that is really
missing is the natural notion of distance between two configurations
when categorical variables are changing. We heuristically assign all
such distances a value of 1. In practice, this doesn't actually affect
our tests for significance anyways, since these distances end up just
being constant factors that don't affect tests for significance of
a difference from zero. However, it does change the absoluate
magnitude of the partial derivative values.
To encode categoricals as integers, we need to create multiple copies
of each value of the categorical so that every combination of values
appears adjacent to each other. For simplicity, we create all such
pairs and then just concatenate together duplicate slices of the data
for each set of pairs. This is a little bit less efficient than
strictly necessary, since we only actually care about the partial
values that are computed for every second pair (the rest will all be
duplicates), but it allows for simple code design and hence reduces
the risk of implementation errors.
Parameters
----------
y : numpy.ndarray
The pre-embedded array of y value samples (see _embed_y)
value_lists : list of lists
The values that correspond to the dimensions of y
dimensions : tuple
The size of the dimensions of y (excluding the last dimension,
which contains all of the samples of y).
Returns
-------
encoded_y : numpy.ndarray
The objective function samples with the categorical axes
encoded as integers
value_lists : list of lists
The updated list of values for each dimension, using the
new encoding space for the categorical parameters.
dimensions : tuple
The new dimension sizes of the encoding space
encodings : dict of list of tuples
The encodings that were used for the categorical parameters
The dict keys correspond to the axis of the categorical
hyperparameters in the encoding space. The list of tuples
is a list of pairs, where each pair corresponds to one of the
pairs of categorical values with which we are interested in
computing the partial derivative. The pairs contain the
original values of the categorical parameter. The position
of the values in the list of tuples when flattened corresponds
to the position of those values in the encoding space.
"""
# So that we can modify it
dimensions = list(dimensions)
# The map that let's use get back the original encoding space
encodings = {}
for axis in range(len(self.hyperparameters)):
hp_name = self.hyperparameters[axis]
hp = self.config_space.get_hyperparameter(hp_name)
if isinstance(hp, CategoricalHyperparameter):
choices = hp.choices
positions = list(range(len(choices)))
value_pairs = list(itertools.combinations(choices, 2))
position_pairs = list(itertools.combinations(positions, 2))
# Flatten
position_pairs = [p for pair in position_pairs for p in pair]
# Create the duplicates along the axis
y = y.take(position_pairs, axis=axis)
# To get a distance of 1 between each categorical encoding we
# replace the "values" with a list of integers
value_lists[axis] = list(range(len(position_pairs)))
# Update the size of this dimension
dimensions[axis] = len(position_pairs)
encodings[axis] = value_pairs
dimensions = tuple(dimensions)
return y, value_lists, dimensions, encodings
def _decode_categoricals(self, interactions, encodings):
"""_decode_categoricals
Removes the duplicates of the categorical partial derivatives that
were by-products of encoding the categorical parameters as if they
are integers.
Parameters
----------
interactions : dict
A dict mapping tuples of hyperparameters indicies to grids of
partial derivatives with respect to the hyperparameter indicies.
encodings : dict of list of tuples
The dict keys correspond to the axis of the categorical
hyperparameters in the encoding space. The list of tuples
is a list of pairs, where each pair corresponds to one of the
pairs of categorical values with which we are interested in
computing the partial derivative. The pairs contain the
original values of the categorical parameter. The position
of the values in the list of tuples when flattened corresponds
to the position of those values in the encoding space.
Returns
-------
interactions : dict
A dict mapping tuples of hyperparameter indices to grids of
partial derivatives with respect to the hyperparameter indicies.
"""
def decode_categorical(interaction, p):
n = interaction.shape[p]
interaction = np.take(interaction, range(0, n, 2), axis=p)
return interaction
for hps in interactions:
interaction = interactions[hps]
for hp in hps:
if hp in encodings:
interaction = decode_categorical(interaction, hp)
interactions[hps] = interaction
return interactions
def test_reject_parameter_independence(self, order=2, distance_type='graphical', exclude=0):
"""test_reject_parameter_independence
Calculates the percentage of the landscape for which tuples of
hyperparameters of length order are independent from each other.
Technically, this is achieved by approximating the partial derivatives
with respect to the tuples of hyperparameters using the method of
finite differences. This is done for each sample value of the objective
function of the landscape. A two-sided t-test is performed to test for
a significant difference from zero for each such set of partial
derivative estimates.
Note that this can still be used for order=1, in which case the
'independence' is bet thought of as the fraction of the landscape
for which the value of the objective function is independent from
the individual hyperparameter values.
Parameters
----------
order : int
A positive integer that encodes the degree of the interactions
which will be returned.
distance_type : str
Choose what kind of distance space is used to calculate distances.
Options are 'cartesian', which normalizes the numerical parameters
to the range [0,1] and then uses these values as the distances; and
'graphical', which counts the minimal number of steps that would
need to be taken in a neighbourhood graph of the landscape.
exlude : float
The percentage of the landscape which you are excluding from the
analysis. Must be a number in [0,100). The worst exclude% of the
configuration space will be omitted from the analysis. If any one
configuration that is needed to calculate a partial derivative is
excluded, we will exclude that partial derivative.
Returns
-------
percent_dependent : dict of tuples to floats
A dict containing tuples of hyperparamater names and the
corresponding percentage of the landscape for which the two
parameters are dependent.
total_percent_dependent : float
The total percentage of the partial derivatives that are dependent
in the landscape.
partial_intervals : dict of tuples to np.ndarrays
A dict containing tuples of hyperparameter names and arrays
containing the upper and lower bounds on the partial derivatives.
"""
# Stuff I removed:
"""
absolute_partials : dict of tuples to floats
The mean absolute value of the partial derivatives for each tuple
of hyperparameters.
mean_absolute_partial : float
The mean absolute value of all of the partial derivatives
"""
#def get_mean_abs_partial(interaction):
# samples = interaction.reshape((-1))
# return np.mean(np.abs(samples))
def get_interaction_intervals(interaction):
samples = interaction.reshape((-1, interaction.shape[-1]))
intervals = helper.confidence_interval_student(samples, self.confidence_level)
return intervals
interactions = self._calculate_partials(order=order, distance_type=distance_type, exclude=exclude)
percent_dependent = {}
#absolute_partials = {}
partial_intervals = {}
total_percent_dependent = 0
total_n_samples = 0
#mean_absolute_partial = 0
for hps in interactions:
parsed_hps = tuple([self.hyperparameters[hp] for hp in hps])
dependent, n_samples = self._get_percent_dependent(interactions[hps])
#current_absolute_partial = get_mean_abs_partial(interactions[hps])
intervals = get_interaction_intervals(interactions[hps])
percent_dependent[parsed_hps] = dependent
#absolute_partials[parsed_hps] = current_absolute_partial
partial_intervals[parsed_hps] = intervals
total_percent_dependent += dependent
#mean_absolute_partial += current_absolute_partial
total_n_samples += 1
return percent_dependent, total_percent_dependent/total_n_samples, \
partial_intervals
#absolute_partials, mean_absolute_partial/total_n_samples, \
def _get_percent_dependent(self, interaction):
samples = interaction.reshape((-1, interaction.shape[-1]))
# nans may appear because we excluded part of the landscape by
# replacing their values with nans, now we just want to remove them
samples = samples[np.where(np.all(~np.isnan(samples), axis=1))[0]]
pvalues = scipy.stats.ttest_1samp(samples, 0, axis=1)[1]
# Note that scipy.stats.ttest_1samp returns nan values if the
# a sample contains nothing but zeros. In this case, all evidence
# clearly points to the fact that the parameters are independent,
# so that's what we want our output to be.
pvalues = np.nan_to_num(pvalues, nan=1)
dependent = pvalues <= 1 - self.confidence_level
return np.mean(dependent)*100, len(dependent)
def check_partial_objective_correlation(self, order=2, distance_type='graphical', exclude=0):
"""test_partial_objective_correlation
Calculates the rank correlation between the absolute value of the
partial derivative values and objective function values.
Technically, this is achieved by approximating the partial derivatives
with respect to the tuples of hyperparameters using the method of
finite differences. This is done for each sample value of the objective