Merge pull request #340 from bashtage/system-lr

ENH: Add likelihood ratio test for SUR
bashtage · Apr 8, 2021 · 5c2b663 · 5c2b663
2 parents 2d81262 + 8788a1b
commit 5c2b663
Show file tree

Hide file tree

Showing 9 changed files with 164 additions and 14 deletions.
diff --git a/doc/source/changes.rst b/doc/source/changes.rst
@@ -1,5 +1,10 @@
 Change Log
 ==========
+Version 4.24
+------------
+* Added :func:`~linearmodels.system.results.SystemResults.breusch_pagan` and
+  :func:`~linearmodels.system.results.SystemResults.likelihood_ratio` to test
+  whether the shock covariance is diagonal.
 
 Version 4.21
 ------------
@@ -8,7 +13,7 @@ Version 4.21
   inference.
 * Added ``rank_check`` argument to panel-data models that allows the rank
   check to be skipped. Estimating a model that is rank deficient may result
-  in unreliable estiamtes and so caution is needed if using this option.
+  in unreliable estimates and so caution is needed if using this option.
 * Changed the rank check to use :func:`numpy.linalg.lstsq` which is better
   aligned with parameter estimation than the :func:`numpy.linalg.svd`-based
   :func:`numpy.linalg.matrix_rank`.

diff --git a/doc/source/names_wordlist.txt b/doc/source/names_wordlist.txt
@@ -40,3 +40,7 @@ rubin
 basmann
 sargan
 wooldridge
+Breusch
+Pagan
+breusch
+pagan
diff --git a/doc/source/spelling_wordlist.txt b/doc/source/spelling_wordlist.txt
@@ -203,3 +203,4 @@ numpy
 str
 debias
 pyhdfe
+absorber
diff --git a/doc/source/system/convert-lyx.cmd b/doc/source/system/convert-lyx.cmd
@@ -1,4 +1,4 @@
-"C:\Program Files (x86)\LyX 2.3\bin\lyx" --force-overwrite --export latex mathematical-detail.lyx
+"c:\Program Files\LyX 2.3\bin   \lyx" --force-overwrite --export latex mathematical-detail.lyx
 pandoc -s mathematical-detail.tex -o mathematical-detail.rst
 copy /Y mathematical-detail.rst mathematical-detail-pre.txt
 del mathematical-detail.rst

diff --git a/doc/source/system/mathematical-detail.lyx b/doc/source/system/mathematical-detail.lyx
@@ -1068,6 +1068,40 @@ ic weighting formula immediately above.
 
 \end_layout
 
+\begin_layout Subsection*
+Testing Covariance and Correlations
+\end_layout
+
+\begin_layout Standard
+Two tests are available to test whether the residual covariance is diagonal.
+ These are useful diagnostics when considering GLS estimation.
+ If the tests reject the null, then the data suggest that GLS estimation
+ should improve efficiency as long as the regressors are not all common.
+ If the null is not rejected, then the covariance is not statistically different
+ from a diagonal covariance and there are unlikely to be gains to using
+ GLS.
+ The Breusch-Pagan test directly examines the correlations of the residuals,
+ and is defined as 
+\begin_inset Formula 
+\[
+N\left(\sum_{i=1}^{K}\sum_{j=i+1}^{K}\hat{\rho}\right)\sim\chi_{K\left(K-1\right)/2}^{2}.
+\]
+
+\end_inset
+
+The likelihood ratio is defined as the difference between the log determinants
+ of a diagonal covariance matrix and the full unrestricted covariance matrix,
+\begin_inset Formula 
+\[
+N\left(\sum_{i=1}^{K}\ln\hat{\sigma}_{i}^{2}-\ln\left|\hat{\Sigma}\right|\right)=N\left(\sum_{i=1}^{K}\ln\left|\hat{\Sigma}\odot I_{K}\right|-\ln\left|\hat{\Sigma}\right|\right)\sim\chi_{K\left(K-1\right)/2}^{2}.
+\]
+
+\end_inset
+
+The asymptotic distribution of the likelihood ratio test requires homoskedastici
+ty.
+\end_layout
+
 \begin_layout Subsection*
 System Measures of Fit (
 \begin_inset Formula $R^{2}$

diff --git a/doc/source/system/mathematical-detail.txt b/doc/source/system/mathematical-detail.txt
@@ -371,6 +371,29 @@ parameters will simplify to
 
 .. math:: \widehat{Var\left(\hat{\beta}\right)}=N^{-1}\left(\frac{X^{\prime}Z}{N}\hat{W}^{-1}\frac{Z^{\prime}X}{N}\right)^{-1}.
 
+Testing Covariance and Correlations
+-----------------------------------
+
+Two tests are available to test whether the residual covariance is
+diagonal. These are useful diagnostics when considering GLS estimation.
+If the tests reject the null, then the data suggest that GLS estimation
+should improve efficiency as long as the regressors are not all common.
+If the null is not rejected, then the covariance is not statistically
+different from a diagonal covariance and there are unlikely to be gains
+to using GLS. The Breusch-Pagan test directly examines the correlations
+of the residuals, and is defined as
+
+.. math:: N\left(\sum_{i=1}^{K}\sum_{j=i+1}^{K}\hat{\rho}\right)\sim\chi_{K\left(K-1\right)/2}^{2}.
+
+The likelihood ratio is defined as the difference between the log
+determinants of a diagonal covariance matrix and the full unrestricted
+covariance matrix,
+
+.. math:: N\left(\sum_{i=1}^{K}\ln\hat{\sigma}_{i}^{2}-\ln\left|\hat{\Sigma}\right|\right)=N\left(\sum_{i=1}^{K}\ln\left|\hat{\Sigma}\odot I_{K}\right|-\ln\left|\hat{\Sigma}\right|\right)\sim\chi_{K\left(K-1\right)/2}^{2}.
+
+The asymptotic distribution of the likelihood ratio test requires
+homoskedasticity.
+
 System Measures of Fit (:math:`R^{2}`)
 --------------------------------------
 

diff --git a/linearmodels/panel/model.py b/linearmodels/panel/model.py
@@ -263,7 +263,7 @@ class _PanelModelBase(object):
         Flag indicating whether to perform a rank check on the exogenous
         variables to ensure that the model is identified. Skipping this
         check can reduce the time required to validate a model specification.
-        Results may be numerically instable if this check is skipped and
+        Results may be numerically unstable if this check is skipped and
         the matrix is not full rank.
     """
 
@@ -402,7 +402,7 @@ def _check_exog_rank(self) -> int:
             raise ValueError(
                 "exog does not have full column rank. If you wish to proceed with "
                 "model estimation irrespective of the numerical accuracy of "
-                "coefficient estiamtes, you can set rank_check=False."
+                "coefficient estimates, you can set rank_check=False."
             )
         return rank_of_x
 
@@ -804,7 +804,7 @@ class PooledOLS(_PanelModelBase):
         Flag indicating whether to perform a rank check on the exogenous
         variables to ensure that the model is identified. Skipping this
         check can reduce the time required to validate a model specification.
-        Results may be numerically instable if this check is skipped and
+        Results may be numerically unstable if this check is skipped and
         the matrix is not full rank.
 
     Notes
@@ -854,7 +854,7 @@ def from_formula(
             Flag indicating whether to perform a rank check on the exogenous
             variables to ensure that the model is identified. Skipping this
             check can reduce the time required to validate a model
-            specification. Results may be numerically instable if this check
+            specification. Results may be numerically unstable if this check
             is skipped and the matrix is not full rank.
 
         Returns
@@ -1102,7 +1102,7 @@ class PanelOLS(_PanelModelBase):
         Flag indicating whether to perform a rank check on the exogenous
         variables to ensure that the model is identified. Skipping this
         check can reduce the time required to validate a model specification.
-        Results may be numerically instable if this check is skipped and
+        Results may be numerically unstable if this check is skipped and
         the matrix is not full rank.
 
     Notes
@@ -1331,7 +1331,7 @@ def from_formula(
             Flag indicating whether to perform a rank check on the exogenous
             variables to ensure that the model is identified. Skipping this
             check can reduce the time required to validate a model
-            specification. Results may be numerically instable if this check
+            specification. Results may be numerically unstable if this check
             is skipped and the matrix is not full rank.
 
         Returns
@@ -2165,7 +2165,7 @@ def from_formula(
             Flag indicating whether to perform a rank check on the exogenous
             variables to ensure that the model is identified. Skipping this
             check can reduce the time required to validate a model
-            specification. Results may be numerically instable if this check
+            specification. Results may be numerically unstable if this check
             is skipped and the matrix is not full rank.
 
         Returns
@@ -2463,7 +2463,7 @@ def from_formula(
             Flag indicating whether to perform a rank check on the exogenous
             variables to ensure that the model is identified. Skipping this
             check can reduce the time required to validate a model
-            specification. Results may be numerically instable if this check
+            specification. Results may be numerically unstable if this check
             is skipped and the matrix is not full rank.
 
         Returns
@@ -2556,7 +2556,7 @@ def from_formula(
             Flag indicating whether to perform a rank check on the exogenous
             variables to ensure that the model is identified. Skipping this
             check can reduce the time required to validate a model
-            specification. Results may be numerically instable if this check
+            specification. Results may be numerically unstable if this check
             is skipped and the matrix is not full rank.
 
         Returns
@@ -3028,7 +3028,7 @@ def from_formula(
             Flag indicating whether to perform a rank check on the exogenous
             variables to ensure that the model is identified. Skipping this
             check can reduce the time required to validate a model
-            specification. Results may be numerically instable if this check
+            specification. Results may be numerically unstable if this check
             is skipped and the matrix is not full rank.
 
         Returns

diff --git a/linearmodels/system/results.py b/linearmodels/system/results.py
@@ -523,7 +523,7 @@ def breusch_pagan(self) -> Union[WaldTestStatistic, InvalidTestStatistic]:
         Returns
         -------
         WaldTestStatistic
-            Test statistic for null all correlationsare zero.
+            Test statistic for null all correlations are zero.
 
         Notes
         -----
@@ -540,7 +540,11 @@ def breusch_pagan(self) -> Union[WaldTestStatistic, InvalidTestStatistic]:
 
         where :math:`\hat{\rho}_{ij}` is the sample residual correlation
         between series i and j. n is the sample size. It has an asymptotic
-        :math:`\chi^2_{k(k-1)/2}` distribution.
+        :math:`\chi^2_{k(k-1)/2}` distribution. See [1]_ for details.
+
+        References
+        ----------
+        .. [1] Greene, William H. Econometric analysis. Pearson Education, 2003.
         """
         name = "Breusch-Pagan LM Test"
         resids = self.resids
@@ -561,6 +565,60 @@ def breusch_pagan(self) -> Union[WaldTestStatistic, InvalidTestStatistic]:
             name=name,
         )
 
+    def likelihood_ratio(self) -> Union[WaldTestStatistic, InvalidTestStatistic]:
+        r"""
+        Likelihood ratio test of no cross-correlation
+
+        Returns
+        -------
+        WaldTestStatistic
+            Test statistic that the covariance is diagonal.
+
+        Notes
+        -----
+        The null hypothesis is that the shock covariance matrix is diagonal,
+        and so all correlations are 0. In this case, there are no gains to
+        using GLS estimation in the system estimator.
+
+        When the null is rejected, there should be efficiency gains to using
+        GLS as long the regressors are not common to all models.
+
+        The LR test statistic is defined as
+
+        .. math::
+
+           LR=n\left[\sum_{i=1}^{k}\log\hat{\sigma}_i^2
+              -\log\left|\hat{\Sigma}\right|\right]
+
+        where :math:`\hat{\sigma}_i^2` is the sample residual variance for
+        series i and :math:`\hat{\Sigma}` is the residual covariance.
+        n is the sample size. It has an asymptotic :math:`\chi^2_{k(k-1)/2}`
+        distribution. The asymptotic distribution of the likelihood ratio
+        test requires homoskedasticity. See [1]_ for details.
+
+        References
+        ----------
+        .. [1] Greene, William H. Econometric analysis. Pearson Education, 2003.
+        """
+        name = "Likelihood Ratio Test for Diagonal Covariance"
+        resids = np.asarray(self.resids)
+        if resids.shape[1] == 1:
+            return InvalidTestStatistic(
+                "Cannot test covariance structure when the system contains a single "
+                "dependent variable.",
+                name=name,
+            )
+        sigma = resids.T @ resids / resids.shape[0]
+        nobs, k = resids.shape
+        _, logdet = np.linalg.slogdet(sigma)
+        stat = nobs * (np.log(np.diag(sigma)).sum() - logdet)
+        return WaldTestStatistic(
+            stat,
+            "Covariance is diagonal",
+            k * (k - 1) // 2,
+            name=name,
+        )
+
 
 class SystemEquationResult(_CommonResults):
     """

diff --git a/linearmodels/tests/system/test_sur.py b/linearmodels/tests/system/test_sur.py
@@ -878,3 +878,28 @@ def test_brequsch_pagan(k):
     assert_allclose(stat.pval, 1.0 - scipy.stats.chi2(3).cdf(direct))
     assert "Residuals are uncorrelated" in stat.null
     assert "Breusch-Pagan" in str(stat)
+
+
+@pytest.mark.parametrize("k", [1, 3])
+def test_likelihood_ratio(k):
+    eqns = generate_data(k=k)
+    mod = SUR(eqns)
+    res = mod.fit()
+    stat = res.likelihood_ratio()
+    if k == 1:
+        assert isinstance(stat, InvalidTestStatistic)
+        assert "Likelihood Ratio Test" in str(stat)
+        assert np.isnan(stat.stat)
+        return
+    eps = np.asarray(res.resids)
+    sigma = eps.T @ eps / eps.shape[0]
+    nobs = res.resids.shape[0]
+    direct = np.linalg.slogdet(sigma * np.eye(k))[1]
+    direct -= np.linalg.slogdet(sigma)[1]
+    direct *= nobs
+    assert isinstance(stat, WaldTestStatistic)
+    assert_allclose(stat.stat, direct)
+    assert stat.df == 3
+    assert_allclose(stat.pval, 1.0 - scipy.stats.chi2(3).cdf(direct))
+    assert "Covariance is diagonal" in stat.null
+    assert "Likelihood Ratio Test" in str(stat)