-
Notifications
You must be signed in to change notification settings - Fork 0
/
aa.py
159 lines (127 loc) · 5.44 KB
/
aa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import numpy as np
import numpy.linalg as LA
class AndersonAcceleration:
"""Anderson acceleration for fixed-point iteration
(Regularized) Anderson acceleration algorithm, also known as Approximate
Maximal Polynomial Extrapolation (AMPE). The goal is to find a fixed point
to some Lipschitz continuous function `g`, that is, find an `x` such that
`g(x) = x`. AMPE uses some previous iterates and residuals to solve for
coefficients, and then use them to extrapolate to the next iterate. The
parameters used in this implementation are in [1].
Parameters
----------
window_size : int (optional, default=5)
The number of previous iterates to use in the extrapolation. This is
`m` in the algorithm.
reg : float (optional, default=0)
The L2 regularization parameter. This is `lambda` in the algorithm.
mixing_param : float (optional, default=1)
The mixing parameter. Must be between 0 and 1. This is `beta` in the
algorithm.
Attributes
----------
x_hist_ : list
History of the previous accelerated iterates.
Maximum size = `window_size` + 1.
gx_hist_ : list
History of the previous function applications. These are
pre-accelerated iterates.
Maximum size = `window_size` + 1.
residuals_hist_ : list
History of the previous residuals.
Maximum size = `window_size` + 1.
param_shape_ : tuple
Shape of the parameters, defined when the first iterate is applied.
References
----------
[1] T. D. Nguyen, A. R. Balef, C. T. Dinh, N. H. Tran, D. T. Ngo,
T. A. Le, and P. L. Vo. "Accelerating federated edge learning,"
in IEEE Communications Letters, 25(10):3282–3286, 2021.
[2] D. Scieur, A. d’Aspremont, and F. Bach, “Regularized nonlinear
acceleration,” in Advances in Neural Information Processing Systems,
2016.
[3] A. d’Aspremont, D. Scieur, and A. Taylor, “Acceleration methods,”
arXiv preprint arXiv:2101.09545, 2021.
[4] "Anderson Acceleration," Stanford University Convex Optimization Group.
https://github.com/cvxgrp/aa
Examples
--------
>>> # x is a d-dimensional numpy array, produced by applying
>>> # the function g to the previous iterate x_{t-1}
>>> acc = AndersonAccelerationModule(reg=1e-8)
>>> x_acc = acc.apply(x) # accelerate from x
"""
def __init__(self, window_size=5, reg=0, mixing_param=1.):
window_size = int(window_size)
assert window_size > 0, "Window size must be positive"
self.window_size = int(window_size)
assert reg >= 0, "Regularization parameter must be non-negative"
self.reg = reg
assert 0 <= mixing_param <= 1, "Mixing parameter must be between 0 and 1"
self.mixing_param = mixing_param
# History of function applications
self.gx_hist_ = []
# History of iterates
self.x_hist_ = []
# History of residuals
self.residuals_hist_ = []
# Shape of the parameters, defined when the first iterate is applied
self.param_shape_ = None
def apply(self, x):
"""Perform acceleration on an iterate.
Parameters
----------
x : numpy array
The iterate to accelerate. This is the application of `g` to the
previous iterate.
Returns
-------
x_acc : numpy array
The accelerated iterate of the same shape as `x`.
"""
if len(self.x_hist_) <= 0:
# First iteration, so no acceleration can be done
self.x_hist_.append(x)
self.param_shape_ = x.shape
return x
# Check the shape of the iterate
assert x.shape == self.param_shape_, \
"Iterate shape must be the same as the previous iterate"
x_prev = self.x_hist_[-1]
residual = x - x_prev
self.residuals_hist_.append(residual)
if len(self.residuals_hist_) > self.window_size + 1:
self.residuals_hist_.pop(0)
self.gx_hist_.append(x)
if len(self.gx_hist_) > self.window_size + 1:
self.gx_hist_.pop(0)
# Solve for alpha: min ||alpha_i F_{t-i}||
Ft = np.stack(self.residuals_hist_) # shape = (m_t + 1, dim)
RR = Ft @ Ft.T
RR += self.reg * np.eye(RR.shape[0])
try:
RR_inv = LA.inv(RR)
alpha = np.sum(RR_inv, 1)
except LA.LinAlgError:
# Singular matrix, so solve least squares instead
alpha = LA.lstsq(RR, np.ones(Ft.shape[0]), -1)[0]
# Normalize alpha
alpha /= alpha.sum() + 1e-12
# Extrapolate to get accelerated iterate
if len(self.x_hist_) <= 0:
x_acc = x
else:
x_acc = 0
for alpha_i, x_i, Gx_i in zip(alpha, self.x_hist_, self.gx_hist_):
x_acc += (1 - self.mixing_param) * alpha_i * x_i
x_acc += self.mixing_param * alpha_i * Gx_i
self.x_hist_.append(x_acc)
if len(self.x_hist_) > self.window_size + 1:
self.x_hist_.pop(0)
return x_acc
def reset_hist(self):
"""Empty the histories of iterates and residuals.
"""
self.x_hist_ = []
self.gx_hist_ = []
self.residuals_hist_ = []