lqr.py

import numpy as np

def solve_riccati(A, B, Q, R):
    BTQ = B.T.dot(Q)
    return -np.linalg.inv(R + BTQ.dot(B)).dot(BTQ).dot(A)

class LQRLike(object):
    def dynamics(self, x, u):
        raise NotImplementedError

    def preprocess(self, x):
        raise NotImplementedError

    def policy(self, x, i):
        raise NotImplementedError

    def linear_dynamics_matrices(self, x, u, eps=1e-5):
        ns = x.shape[0]
        nu = u.shape[0]
        A = np.zeros((ns, ns))
        B = np.zeros((ns, nu))

        for i in range(ns):
            dxp = x.copy()
            dxp[i] += eps
            x_inc = self.dynamics(dxp, u)
            dxm = x.copy()
            dxm[i] -= eps
            x_dec = self.dynamics(dxm, u)
            A[:, i] = (x_inc - x_dec) / (2 * eps)

        for i in range(nu):
            dup = u.copy()
            dup[i] += eps
            x_inc = self.dynamics(x, dup)
            dum = u.copy()
            dum[i] -= eps
            x_dec = self.dynamics(x, dum)
            B[:, i] = (x_inc - x_dec) / (2*eps)
        return A, B

class LQRStabilizer(LQRLike):
    def __init__(self, Q, R, x, u):
        self.Q = Q
        self.R = R
        self._operating_x = x
        self._operating_u = u
        self.A = None
        self.B = None

    def get_controls(self):
        x = self._operating_x
        u = self._operating_u
        self.A, self.B = self.linear_dynamics_matrices(x, u)
        self.K = solve_riccati(self.A, self.B, self.Q, self.R)

    def policy(self, x, i):
        return self.K.dot(x)

class AnalyticILQR(LQRLike):
    """
    Generic class for iLQR solvers.

    Usage:
        Make subclass of AnalyticLQR and provide implementations of:
            * dynamics() : The dynamics model
        By default, linearizations are computed using the method of finite differences.
        If you don't like this, you can override them with the method
            linear_dynamics_matrices : x, u -> A, B
        Call forward() to generate a rollout with a given set of controls
        Call backward() to generate gains to adjust controls
    """
    def __init__(self, Q, R, Qf=None, alpha=0, grad_eps=1e-6):
        """
        Initialize with Q and R matrices to define a linear quadratic cost function:
            l(x, u) = x^T . Q . x + u^T . R. u
        Parameters
        ----------
        Q : matrix
            The cost matrix to penalize state
        R : matrix
            The cost matrix to penalize controls
        Qf : matrix (optional)
            The cost matrix to penalize the final state.
            If None, Qf <- Q.
            Default is None.
        alpha : float (optional)
            Regularization parameter to penalize large deviations from targets.
            Default is 0.
        grad_eps : float (optional)
            The epsilon value for finite difference gradient computations.
            Default is 1e-5.
        """
        super(AnalyticILQR, self).__init__()
        self.Q = Q
        self.R = R
        self.Qf = Q if Qf is None else Qf
        self.alpha = alpha
        self._grad_eps = grad_eps

    def linear_dynamics_matrices(self, x, u):
        """
        Compute the first order Taylor approximation matrices for the dynamics:
        dx_{t+1} ~= A(dx_t) + B(du_t)
        Should return A, B
        """
        return super(AnalyticILQR, self).linear_dynamics_matrices(x, u, self._grad_eps)

    def dynamics(self, x, u):
        """
        Compute the next state of the system given a state and a control
        Note: we must discretize time
        """
        raise NotImplementedError

    def forward(self, x, us, target=None):
        """
        Compute a rollout starting in a given state and using a given set of controls

        Parameters
        ----------
        x : vector
            The initial state
        us : list of vector
            The set of controls
        target : vector (optional)
            The target state, used to measure loss of rollout.
            If None, measured loss will be 0.
            Default: None

        Returns
        -------
        xs : list of vector
            The states of the trajectory generated by the rollout
        loss : float
            The loss incurred by the rollout
        """
        xs = [x.copy()]
        loss = 0
        Q = self.Q
        R = self.R
        Qf = self.Qf
        for u in us:
            x = self.preprocess(self.dynamics(x, u))
            if target is not None:
                delta = x - target
                loss += delta.T.dot(Q).dot(delta) + u.T.dot(R).dot(u)
            xs.append(x.copy())
        if target is not None:
            delta = x - target
            loss += delta.T.dot(Qf).dot(delta)
        return xs, loss

    def backward(self, xs, us, target, verbose=False):
        """
        Compute iLQR backup

        Parameters
        ----------
        xs : list of vector
            States in trajectory
        us : list of vector
            Controls in trajectory
        target : vector
            The target state
        verbose : bool (optional)
            Unused at the moment

        Return
        ------
        To compute change in controls du, we have:
            du = K(dx) + d
        Ks : list of matrix
            Feedback gains for each step
        ds : list of matrix
            Forcing gains for each step
        """
        Ks = [0 for _ in us]
        ds = [0 for _ in us]
        Q = self.Q
        R = self.R
        alpha = self.alpha
        Qf = self.Qf
        s = Qf.dot(xs[-1] - target)
        S = Qf
        aIQ = alpha * np.eye(Q.shape[0])
        aIR = alpha * np.eye(R.shape[0])
        for i in range(len(xs) - 2, -1, -1):
            x = xs[i]
            u = us[i]
            A, B = self.linear_dynamics_matrices(x, u)
            Qx = (Q + aIQ).dot(x - 1*target) + s.T.dot(A)
            Qu = (R + aIR).dot(u) + s.T.dot(B)
            Qxx = Q + aIQ + A.T.dot(S).dot(A)
            Quu = R + aIR + B.T.dot(S).dot(B)
            Qux = B.T.dot(S).dot(A)
            Quu_inv = np.linalg.inv(Quu)
            Ks[i] = -Quu_inv.dot(Qux)
            ds[i] = -Quu_inv.dot(Qu)
            KT = Ks[i].T
            s = Qx + KT.dot(Quu).dot(ds[i]) + KT.dot(Qu) + Qux.T.dot(ds[i])
            S = Qxx + KT.dot(Quu).dot(Ks[i]) + KT.dot(Qux) + Qux.T.dot(Ks[i])
        return Ks, ds