[sysid] Fix adjusted R² calculation (#6101)

It hardcoded p to 2.
wpilibsuite · Dec 27, 2023 · f2c2bab · f2c2bab
1 parent 5659038
commit f2c2bab
Show file tree

Hide file tree

Showing 9 changed files with 147 additions and 112 deletions.
diff --git a/sysid/src/main/native/cpp/analysis/AnalysisManager.cpp b/sysid/src/main/native/cpp/analysis/AnalysisManager.cpp
@@ -518,9 +518,9 @@ AnalysisManager::FeedforwardGains AnalysisManager::CalculateFeedforward() {
   const auto& ff = sysid::CalculateFeedforwardGains(GetFilteredData(), m_type);
   FeedforwardGains ffGains = {ff, m_trackWidth};
 
-  const auto& Ks = std::get<0>(ff)[0];
-  const auto& Kv = std::get<0>(ff)[1];
-  const auto& Ka = std::get<0>(ff)[2];
+  const auto& Ks = ff.coeffs[0];
+  const auto& Kv = ff.coeffs[1];
+  const auto& Ka = ff.coeffs[2];
 
   if (Ka <= 0 || Kv < 0) {
     throw InvalidDataError(

diff --git a/sysid/src/main/native/cpp/analysis/FeedforwardAnalysis.cpp b/sysid/src/main/native/cpp/analysis/FeedforwardAnalysis.cpp
@@ -13,7 +13,7 @@
 #include "sysid/analysis/FilteringUtils.h"
 #include "sysid/analysis/OLS.h"
 
-using namespace sysid;
+namespace sysid {
 
 /**
  * Populates OLS data for (xₖ₊₁ − xₖ)/τ = αxₖ + βuₖ + γ sgn(xₖ).
@@ -54,9 +54,8 @@ static void PopulateOLSData(const std::vector<PreparedData>& d,
   }
 }
 
-std::tuple<std::vector<double>, double, double>
-sysid::CalculateFeedforwardGains(const Storage& data,
-                                 const AnalysisType& type) {
+OLSResult CalculateFeedforwardGains(const Storage& data,
+                                    const AnalysisType& type) {
   // Iterate through the data and add it to our raw vector.
   const auto& [slowForward, slowBackward, fastForward, fastBackward] = data;
 
@@ -90,23 +89,23 @@ sysid::CalculateFeedforwardGains(const Storage& data,
   // Perform OLS with accel = alpha*vel + beta*voltage + gamma*signum(vel)
   // OLS performs best with the noisiest variable as the dependent var,
   // so we regress accel in terms of the other variables.
-  auto ols = sysid::OLS(X, y);
-  double alpha = std::get<0>(ols)[0];  // -Kv/Ka
-  double beta = std::get<0>(ols)[1];   // 1/Ka
-  double gamma = std::get<0>(ols)[2];  // -Ks/Ka
+  auto ols = OLS(X, y);
+  double alpha = ols.coeffs[0];  // -Kv/Ka
+  double beta = ols.coeffs[1];   // 1/Ka
+  double gamma = ols.coeffs[2];  // -Ks/Ka
 
   // Initialize gains list with Ks, Kv, and Ka
   std::vector<double> gains{-gamma / beta, -alpha / beta, 1 / beta};
 
   if (type == analysis::kElevator) {
     // Add Kg to gains list
-    double delta = std::get<0>(ols)[3];  // -Kg/Ka
+    double delta = ols.coeffs[3];  // -Kg/Ka
     gains.emplace_back(-delta / beta);
   }
 
   if (type == analysis::kArm) {
-    double delta = std::get<0>(ols)[3];    // -Kg/Ka cos(offset)
-    double epsilon = std::get<0>(ols)[4];  // Kg/Ka sin(offset)
+    double delta = ols.coeffs[3];    // -Kg/Ka cos(offset)
+    double epsilon = ols.coeffs[4];  // Kg/Ka sin(offset)
 
     // Add Kg to gains list
     gains.emplace_back(std::hypot(delta, epsilon) / beta);
@@ -116,5 +115,7 @@ sysid::CalculateFeedforwardGains(const Storage& data,
   }
 
   // Gains are Ks, Kv, Ka, Kg (elevator/arm only), offset (arm only)
-  return std::tuple{gains, std::get<1>(ols), std::get<2>(ols)};
+  return OLSResult{gains, ols.rSquared, ols.rmse};
 }
+
+}  // namespace sysid
diff --git a/sysid/src/main/native/cpp/analysis/OLS.cpp b/sysid/src/main/native/cpp/analysis/OLS.cpp
@@ -5,45 +5,84 @@
 #include "sysid/analysis/OLS.h"
 
 #include <cassert>
-#include <tuple>
-#include <vector>
+#include <cmath>
 
 #include <Eigen/Cholesky>
-#include <Eigen/Core>
 
-using namespace sysid;
+namespace sysid {
 
-std::tuple<std::vector<double>, double, double> sysid::OLS(
-    const Eigen::MatrixXd& X, const Eigen::VectorXd& y) {
+OLSResult OLS(const Eigen::MatrixXd& X, const Eigen::VectorXd& y) {
   assert(X.rows() == y.rows());
 
-  // The linear model can be written as follows:
-  // y = Xβ + u, where y is the dependent observed variable, X is the matrix
-  // of independent variables, β is a vector of coefficients, and u is a
-  // vector of residuals.
+  // The linear regression model can be written as follows:
+  //
+  //   y = Xβ + ε
+  //
+  // where y is the dependent observed variable, X is the matrix of independent
+  // variables, β is a vector of coefficients, and ε is a vector of residuals.
+  //
+  // We want to find the value of β that minimizes εᵀε.
+  //
+  //   ε = y − Xβ
+  //   εᵀε = (y − Xβ)ᵀ(y − Xβ)
+  //
+  //   β̂ = argmin (y − Xβ)ᵀ(y − Xβ)
+  //         β
+  //
+  // Take the partial derivative of the cost function with respect to β and set
+  // it equal to zero, then solve for β̂ .
+  //
+  //   0 = −2Xᵀ(y − Xβ̂)
+  //   0 = Xᵀ(y − Xβ̂)
+  //   0 = Xᵀy − XᵀXβ̂
+  //   XᵀXβ̂ = Xᵀy
+  //   β̂  = (XᵀX)⁻¹Xᵀy
 
-  // We want to minimize u² = uᵀu = (y - Xβ)ᵀ(y - Xβ).
   // β = (XᵀX)⁻¹Xᵀy
+  //
+  // XᵀX is guaranteed to be symmetric positive definite, so an LLT
+  // decomposition can be used.
+  Eigen::MatrixXd β = (X.transpose() * X).llt().solve(X.transpose() * y);
 
-  // Calculate β that minimizes uᵀu.
-  Eigen::MatrixXd beta = (X.transpose() * X).llt().solve(X.transpose() * y);
+  // Error sum of squares
+  double SSE = (y - X * β).squaredNorm();
 
-  // We will now calculate R² or the coefficient of determination, which
-  // tells us how much of the total variation (variation in y) can be
-  // explained by the regression model.
+  // Sample size
+  int n = X.rows();
 
-  // We will first calculate the sum of the squares of the error, or the
-  // variation in error (SSE).
-  double SSE = (y - X * beta).squaredNorm();
+  // Number of explanatory variables
+  int p = β.rows();
 
-  int n = X.cols();
+  // Total sum of squares (total variation in y)
+  //
+  // From slide 24 of
+  // http://www.stat.columbia.edu/~fwood/Teaching/w4315/Fall2009/lecture_11:
+  //
+  //   SSTO = yᵀy - 1/n yᵀJy
+  //
+  // Let J = I.
+  //
+  //   SSTO = yᵀy - 1/n yᵀy
+  //   SSTO = (n − 1)/n yᵀy
+  double SSTO = (n - 1.0) / n * (y.transpose() * y).value();
 
-  // Now we will calculate the total variation in y, known as SSTO.
-  double SSTO = ((y.transpose() * y) - (1.0 / n) * (y.transpose() * y)).value();
+  // R² or the coefficient of determination, which represents how much of the
+  // total variation (variation in y) can be explained by the regression model
+  double rSquared = 1.0 - SSE / SSTO;
 
-  double rSquared = (SSTO - SSE) / SSTO;
-  double adjRSquared = 1.0 - (1.0 - rSquared) * ((n - 1.0) / (n - 3.0));
+  // Adjusted R²
+  //
+  //                       n − 1
+  //   R̅² = 1 − (1 − R²) ---------
+  //                     n − p − 1
+  //
+  // See https://en.wikipedia.org/wiki/Coefficient_of_determination#Adjusted_R2
+  double adjRSquared = 1.0 - (1.0 - rSquared) * ((n - 1.0) / (n - p - 1.0));
+
+  // Root-mean-square error
   double RMSE = std::sqrt(SSE / n);
 
-  return {{beta.data(), beta.data() + beta.rows()}, adjRSquared, RMSE};
+  return {{β.data(), β.data() + β.size()}, adjRSquared, RMSE};
 }
+
+}  // namespace sysid
diff --git a/sysid/src/main/native/cpp/view/Analyzer.cpp b/sysid/src/main/native/cpp/view/Analyzer.cpp
@@ -49,9 +49,9 @@ void Analyzer::UpdateFeedforwardGains() {
   WPI_INFO(m_logger, "{}", "Gain calc");
   try {
     const auto& [ff, trackWidth] = m_manager->CalculateFeedforward();
-    m_ff = std::get<0>(ff);
-    m_accelRSquared = std::get<1>(ff);
-    m_accelRMSE = std::get<2>(ff);
+    m_ff = ff.coeffs;
+    m_accelRSquared = ff.rSquared;
+    m_accelRMSE = ff.rmse;
     m_trackWidth = trackWidth;
     m_settings.preset.measurementDelay =
         m_settings.type == FeedbackControllerLoopType::kPosition

diff --git a/sysid/src/main/native/include/sysid/analysis/AnalysisManager.h b/sysid/src/main/native/include/sysid/analysis/AnalysisManager.h
@@ -98,7 +98,7 @@ class AnalysisManager {
     /**
      * Stores the Feedforward gains.
      */
-    std::tuple<std::vector<double>, double, double> ffGains;
+    OLSResult ffGains;
 
     /**
      * Stores the trackwidth for angular drivetrain tests.

diff --git a/sysid/src/main/native/include/sysid/analysis/FeedforwardAnalysis.h b/sysid/src/main/native/include/sysid/analysis/FeedforwardAnalysis.h
@@ -8,18 +8,16 @@
 #include <vector>
 
 #include "sysid/analysis/AnalysisType.h"
+#include "sysid/analysis/OLS.h"
 #include "sysid/analysis/Storage.h"
 
 namespace sysid {
 
 /**
  * Calculates feedforward gains given the data and the type of analysis to
  * perform.
- *
- * @return Tuple containing the coefficients of the analysis along with the
- *         r-squared (coefficient of determination) and RMSE (standard deviation
- * of the residuals) of the fit.
  */
-std::tuple<std::vector<double>, double, double> CalculateFeedforwardGains(
-    const Storage& data, const AnalysisType& type);
+OLSResult CalculateFeedforwardGains(const Storage& data,
+                                    const AnalysisType& type);
+
 }  // namespace sysid
diff --git a/sysid/src/main/native/include/sysid/analysis/OLS.h b/sysid/src/main/native/include/sysid/analysis/OLS.h
@@ -5,22 +5,29 @@
 #pragma once
 
 #include <cstddef>
-#include <tuple>
 #include <vector>
 
 #include <Eigen/Core>
 
 namespace sysid {
 
+struct OLSResult {
+  /// Regression coeficients.
+  std::vector<double> coeffs;
+
+  /// R² (coefficient of determination)
+  double rSquared = 0.0;
+
+  /// Root-mean-square error
+  double rmse = 0.0;
+};
+
 /**
- * Performs ordinary least squares multiple regression on the provided data and
- * returns a vector of coefficients along with the r-squared (coefficient of
- * determination) and RMSE (stardard deviation of the residuals) of the fit.
+ * Performs ordinary least squares multiple regression on the provided data.
  *
  * @param X The independent data in y = Xβ.
  * @param y The dependent data in y = Xβ.
  */
-std::tuple<std::vector<double>, double, double> OLS(const Eigen::MatrixXd& X,
-                                                    const Eigen::VectorXd& y);
+OLSResult OLS(const Eigen::MatrixXd& X, const Eigen::VectorXd& y);
 
 }  // namespace sysid