-
Notifications
You must be signed in to change notification settings - Fork 0
/
lqr.py
203 lines (183 loc) · 6.21 KB
/
lqr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import numpy as np
def solve_riccati(A, B, Q, R):
BTQ = B.T.dot(Q)
return -np.linalg.inv(R + BTQ.dot(B)).dot(BTQ).dot(A)
class LQRLike(object):
def dynamics(self, x, u):
raise NotImplementedError
def preprocess(self, x):
raise NotImplementedError
def policy(self, x, i):
raise NotImplementedError
def linear_dynamics_matrices(self, x, u, eps=1e-5):
ns = x.shape[0]
nu = u.shape[0]
A = np.zeros((ns, ns))
B = np.zeros((ns, nu))
for i in range(ns):
dxp = x.copy()
dxp[i] += eps
x_inc = self.dynamics(dxp, u)
dxm = x.copy()
dxm[i] -= eps
x_dec = self.dynamics(dxm, u)
A[:, i] = (x_inc - x_dec) / (2 * eps)
for i in range(nu):
dup = u.copy()
dup[i] += eps
x_inc = self.dynamics(x, dup)
dum = u.copy()
dum[i] -= eps
x_dec = self.dynamics(x, dum)
B[:, i] = (x_inc - x_dec) / (2*eps)
return A, B
class LQRStabilizer(LQRLike):
def __init__(self, Q, R, x, u):
self.Q = Q
self.R = R
self._operating_x = x
self._operating_u = u
self.A = None
self.B = None
def get_controls(self):
x = self._operating_x
u = self._operating_u
self.A, self.B = self.linear_dynamics_matrices(x, u)
self.K = solve_riccati(self.A, self.B, self.Q, self.R)
def policy(self, x, i):
return self.K.dot(x)
class AnalyticILQR(LQRLike):
"""
Generic class for iLQR solvers.
Usage:
Make subclass of AnalyticLQR and provide implementations of:
* dynamics() : The dynamics model
By default, linearizations are computed using the method of finite differences.
If you don't like this, you can override them with the method
linear_dynamics_matrices : x, u -> A, B
Call forward() to generate a rollout with a given set of controls
Call backward() to generate gains to adjust controls
"""
def __init__(self, Q, R, Qf=None, alpha=0, grad_eps=1e-6):
"""
Initialize with Q and R matrices to define a linear quadratic cost function:
l(x, u) = x^T . Q . x + u^T . R. u
Parameters
----------
Q : matrix
The cost matrix to penalize state
R : matrix
The cost matrix to penalize controls
Qf : matrix (optional)
The cost matrix to penalize the final state.
If None, Qf <- Q.
Default is None.
alpha : float (optional)
Regularization parameter to penalize large deviations from targets.
Default is 0.
grad_eps : float (optional)
The epsilon value for finite difference gradient computations.
Default is 1e-5.
"""
super(AnalyticILQR, self).__init__()
self.Q = Q
self.R = R
self.Qf = Q if Qf is None else Qf
self.alpha = alpha
self._grad_eps = grad_eps
def linear_dynamics_matrices(self, x, u):
"""
Compute the first order Taylor approximation matrices for the dynamics:
dx_{t+1} ~= A(dx_t) + B(du_t)
Should return A, B
"""
return super(AnalyticILQR, self).linear_dynamics_matrices(x, u, self._grad_eps)
def dynamics(self, x, u):
"""
Compute the next state of the system given a state and a control
Note: we must discretize time
"""
raise NotImplementedError
def forward(self, x, us, target=None):
"""
Compute a rollout starting in a given state and using a given set of controls
Parameters
----------
x : vector
The initial state
us : list of vector
The set of controls
target : vector (optional)
The target state, used to measure loss of rollout.
If None, measured loss will be 0.
Default: None
Returns
-------
xs : list of vector
The states of the trajectory generated by the rollout
loss : float
The loss incurred by the rollout
"""
xs = [x.copy()]
loss = 0
Q = self.Q
R = self.R
Qf = self.Qf
for u in us:
x = self.preprocess(self.dynamics(x, u))
if target is not None:
delta = x - target
loss += delta.T.dot(Q).dot(delta) + u.T.dot(R).dot(u)
xs.append(x.copy())
if target is not None:
delta = x - target
loss += delta.T.dot(Qf).dot(delta)
return xs, loss
def backward(self, xs, us, target, verbose=False):
"""
Compute iLQR backup
Parameters
----------
xs : list of vector
States in trajectory
us : list of vector
Controls in trajectory
target : vector
The target state
verbose : bool (optional)
Unused at the moment
Return
------
To compute change in controls du, we have:
du = K(dx) + d
Ks : list of matrix
Feedback gains for each step
ds : list of matrix
Forcing gains for each step
"""
Ks = [0 for _ in us]
ds = [0 for _ in us]
Q = self.Q
R = self.R
alpha = self.alpha
Qf = self.Qf
s = Qf.dot(xs[-1] - target)
S = Qf
aIQ = alpha * np.eye(Q.shape[0])
aIR = alpha * np.eye(R.shape[0])
for i in range(len(xs) - 2, -1, -1):
x = xs[i]
u = us[i]
A, B = self.linear_dynamics_matrices(x, u)
Qx = (Q + aIQ).dot(x - 1*target) + s.T.dot(A)
Qu = (R + aIR).dot(u) + s.T.dot(B)
Qxx = Q + aIQ + A.T.dot(S).dot(A)
Quu = R + aIR + B.T.dot(S).dot(B)
Qux = B.T.dot(S).dot(A)
Quu_inv = np.linalg.inv(Quu)
Ks[i] = -Quu_inv.dot(Qux)
ds[i] = -Quu_inv.dot(Qu)
KT = Ks[i].T
s = Qx + KT.dot(Quu).dot(ds[i]) + KT.dot(Qu) + Qux.T.dot(ds[i])
S = Qxx + KT.dot(Quu).dot(Ks[i]) + KT.dot(Qux) + Qux.T.dot(Ks[i])
return Ks, ds