diff --git a/doc/source/changes.rst b/doc/source/changes.rst index e16cfe5b87..632c8da0a4 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -1,5 +1,10 @@ Change Log ========== +Version 4.24 +------------ +* Added :func:`~linearmodels.system.results.SystemResults.breusch_pagan` and + :func:`~linearmodels.system.results.SystemResults.likelihood_ratio` to test + whether the shock covariance is diagonal. Version 4.21 ------------ @@ -8,7 +13,7 @@ Version 4.21 inference. * Added ``rank_check`` argument to panel-data models that allows the rank check to be skipped. Estimating a model that is rank deficient may result - in unreliable estiamtes and so caution is needed if using this option. + in unreliable estimates and so caution is needed if using this option. * Changed the rank check to use :func:`numpy.linalg.lstsq` which is better aligned with parameter estimation than the :func:`numpy.linalg.svd`-based :func:`numpy.linalg.matrix_rank`. diff --git a/doc/source/names_wordlist.txt b/doc/source/names_wordlist.txt index 6aaadf5a59..e3dc3bd6f2 100644 --- a/doc/source/names_wordlist.txt +++ b/doc/source/names_wordlist.txt @@ -40,3 +40,7 @@ rubin basmann sargan wooldridge +Breusch +Pagan +breusch +pagan diff --git a/doc/source/spelling_wordlist.txt b/doc/source/spelling_wordlist.txt index 3bb2c84ada..2cac3d0cf1 100644 --- a/doc/source/spelling_wordlist.txt +++ b/doc/source/spelling_wordlist.txt @@ -203,3 +203,4 @@ numpy str debias pyhdfe +absorber diff --git a/doc/source/system/convert-lyx.cmd b/doc/source/system/convert-lyx.cmd index fb98eafab8..124847c865 100644 --- a/doc/source/system/convert-lyx.cmd +++ b/doc/source/system/convert-lyx.cmd @@ -1,4 +1,4 @@ -"C:\Program Files (x86)\LyX 2.3\bin\lyx" --force-overwrite --export latex mathematical-detail.lyx +"c:\Program Files\LyX 2.3\bin \lyx" --force-overwrite --export latex mathematical-detail.lyx pandoc -s mathematical-detail.tex -o mathematical-detail.rst copy /Y mathematical-detail.rst mathematical-detail-pre.txt del mathematical-detail.rst diff --git a/doc/source/system/mathematical-detail.lyx b/doc/source/system/mathematical-detail.lyx index 0cd60630c4..bd45582000 100644 --- a/doc/source/system/mathematical-detail.lyx +++ b/doc/source/system/mathematical-detail.lyx @@ -1068,6 +1068,40 @@ ic weighting formula immediately above. \end_layout +\begin_layout Subsection* +Testing Covariance and Correlations +\end_layout + +\begin_layout Standard +Two tests are available to test whether the residual covariance is diagonal. + These are useful diagnostics when considering GLS estimation. + If the tests reject the null, then the data suggest that GLS estimation + should improve efficiency as long as the regressors are not all common. + If the null is not rejected, then the covariance is not statistically different + from a diagonal covariance and there are unlikely to be gains to using + GLS. + The Breusch-Pagan test directly examines the correlations of the residuals, + and is defined as +\begin_inset Formula +\[ +N\left(\sum_{i=1}^{K}\sum_{j=i+1}^{K}\hat{\rho}\right)\sim\chi_{K\left(K-1\right)/2}^{2}. +\] + +\end_inset + +The likelihood ratio is defined as the difference between the log determinants + of a diagonal covariance matrix and the full unrestricted covariance matrix, +\begin_inset Formula +\[ +N\left(\sum_{i=1}^{K}\ln\hat{\sigma}_{i}^{2}-\ln\left|\hat{\Sigma}\right|\right)=N\left(\sum_{i=1}^{K}\ln\left|\hat{\Sigma}\odot I_{K}\right|-\ln\left|\hat{\Sigma}\right|\right)\sim\chi_{K\left(K-1\right)/2}^{2}. +\] + +\end_inset + +The asymptotic distribution of the likelihood ratio test requires homoskedastici +ty. +\end_layout + \begin_layout Subsection* System Measures of Fit ( \begin_inset Formula $R^{2}$ diff --git a/doc/source/system/mathematical-detail.txt b/doc/source/system/mathematical-detail.txt index 113ef3a026..3017ec8cf1 100644 --- a/doc/source/system/mathematical-detail.txt +++ b/doc/source/system/mathematical-detail.txt @@ -371,6 +371,29 @@ parameters will simplify to .. math:: \widehat{Var\left(\hat{\beta}\right)}=N^{-1}\left(\frac{X^{\prime}Z}{N}\hat{W}^{-1}\frac{Z^{\prime}X}{N}\right)^{-1}. +Testing Covariance and Correlations +----------------------------------- + +Two tests are available to test whether the residual covariance is +diagonal. These are useful diagnostics when considering GLS estimation. +If the tests reject the null, then the data suggest that GLS estimation +should improve efficiency as long as the regressors are not all common. +If the null is not rejected, then the covariance is not statistically +different from a diagonal covariance and there are unlikely to be gains +to using GLS. The Breusch-Pagan test directly examines the correlations +of the residuals, and is defined as + +.. math:: N\left(\sum_{i=1}^{K}\sum_{j=i+1}^{K}\hat{\rho}\right)\sim\chi_{K\left(K-1\right)/2}^{2}. + +The likelihood ratio is defined as the difference between the log +determinants of a diagonal covariance matrix and the full unrestricted +covariance matrix, + +.. math:: N\left(\sum_{i=1}^{K}\ln\hat{\sigma}_{i}^{2}-\ln\left|\hat{\Sigma}\right|\right)=N\left(\sum_{i=1}^{K}\ln\left|\hat{\Sigma}\odot I_{K}\right|-\ln\left|\hat{\Sigma}\right|\right)\sim\chi_{K\left(K-1\right)/2}^{2}. + +The asymptotic distribution of the likelihood ratio test requires +homoskedasticity. + System Measures of Fit (:math:`R^{2}`) -------------------------------------- diff --git a/linearmodels/panel/model.py b/linearmodels/panel/model.py index ba0c913708..90388f873d 100644 --- a/linearmodels/panel/model.py +++ b/linearmodels/panel/model.py @@ -263,7 +263,7 @@ class _PanelModelBase(object): Flag indicating whether to perform a rank check on the exogenous variables to ensure that the model is identified. Skipping this check can reduce the time required to validate a model specification. - Results may be numerically instable if this check is skipped and + Results may be numerically unstable if this check is skipped and the matrix is not full rank. """ @@ -402,7 +402,7 @@ def _check_exog_rank(self) -> int: raise ValueError( "exog does not have full column rank. If you wish to proceed with " "model estimation irrespective of the numerical accuracy of " - "coefficient estiamtes, you can set rank_check=False." + "coefficient estimates, you can set rank_check=False." ) return rank_of_x @@ -804,7 +804,7 @@ class PooledOLS(_PanelModelBase): Flag indicating whether to perform a rank check on the exogenous variables to ensure that the model is identified. Skipping this check can reduce the time required to validate a model specification. - Results may be numerically instable if this check is skipped and + Results may be numerically unstable if this check is skipped and the matrix is not full rank. Notes @@ -854,7 +854,7 @@ def from_formula( Flag indicating whether to perform a rank check on the exogenous variables to ensure that the model is identified. Skipping this check can reduce the time required to validate a model - specification. Results may be numerically instable if this check + specification. Results may be numerically unstable if this check is skipped and the matrix is not full rank. Returns @@ -1102,7 +1102,7 @@ class PanelOLS(_PanelModelBase): Flag indicating whether to perform a rank check on the exogenous variables to ensure that the model is identified. Skipping this check can reduce the time required to validate a model specification. - Results may be numerically instable if this check is skipped and + Results may be numerically unstable if this check is skipped and the matrix is not full rank. Notes @@ -1331,7 +1331,7 @@ def from_formula( Flag indicating whether to perform a rank check on the exogenous variables to ensure that the model is identified. Skipping this check can reduce the time required to validate a model - specification. Results may be numerically instable if this check + specification. Results may be numerically unstable if this check is skipped and the matrix is not full rank. Returns @@ -2165,7 +2165,7 @@ def from_formula( Flag indicating whether to perform a rank check on the exogenous variables to ensure that the model is identified. Skipping this check can reduce the time required to validate a model - specification. Results may be numerically instable if this check + specification. Results may be numerically unstable if this check is skipped and the matrix is not full rank. Returns @@ -2463,7 +2463,7 @@ def from_formula( Flag indicating whether to perform a rank check on the exogenous variables to ensure that the model is identified. Skipping this check can reduce the time required to validate a model - specification. Results may be numerically instable if this check + specification. Results may be numerically unstable if this check is skipped and the matrix is not full rank. Returns @@ -2556,7 +2556,7 @@ def from_formula( Flag indicating whether to perform a rank check on the exogenous variables to ensure that the model is identified. Skipping this check can reduce the time required to validate a model - specification. Results may be numerically instable if this check + specification. Results may be numerically unstable if this check is skipped and the matrix is not full rank. Returns @@ -3028,7 +3028,7 @@ def from_formula( Flag indicating whether to perform a rank check on the exogenous variables to ensure that the model is identified. Skipping this check can reduce the time required to validate a model - specification. Results may be numerically instable if this check + specification. Results may be numerically unstable if this check is skipped and the matrix is not full rank. Returns diff --git a/linearmodels/system/results.py b/linearmodels/system/results.py index e7568e860b..19851aa01d 100644 --- a/linearmodels/system/results.py +++ b/linearmodels/system/results.py @@ -523,7 +523,7 @@ def breusch_pagan(self) -> Union[WaldTestStatistic, InvalidTestStatistic]: Returns ------- WaldTestStatistic - Test statistic for null all correlationsare zero. + Test statistic for null all correlations are zero. Notes ----- @@ -540,7 +540,11 @@ def breusch_pagan(self) -> Union[WaldTestStatistic, InvalidTestStatistic]: where :math:`\hat{\rho}_{ij}` is the sample residual correlation between series i and j. n is the sample size. It has an asymptotic - :math:`\chi^2_{k(k-1)/2}` distribution. + :math:`\chi^2_{k(k-1)/2}` distribution. See [1]_ for details. + + References + ---------- + .. [1] Greene, William H. Econometric analysis. Pearson Education, 2003. """ name = "Breusch-Pagan LM Test" resids = self.resids @@ -561,6 +565,60 @@ def breusch_pagan(self) -> Union[WaldTestStatistic, InvalidTestStatistic]: name=name, ) + def likelihood_ratio(self) -> Union[WaldTestStatistic, InvalidTestStatistic]: + r""" + Likelihood ratio test of no cross-correlation + + Returns + ------- + WaldTestStatistic + Test statistic that the covariance is diagonal. + + Notes + ----- + The null hypothesis is that the shock covariance matrix is diagonal, + and so all correlations are 0. In this case, there are no gains to + using GLS estimation in the system estimator. + + When the null is rejected, there should be efficiency gains to using + GLS as long the regressors are not common to all models. + + The LR test statistic is defined as + + .. math:: + + LR=n\left[\sum_{i=1}^{k}\log\hat{\sigma}_i^2 + -\log\left|\hat{\Sigma}\right|\right] + + where :math:`\hat{\sigma}_i^2` is the sample residual variance for + series i and :math:`\hat{\Sigma}` is the residual covariance. + n is the sample size. It has an asymptotic :math:`\chi^2_{k(k-1)/2}` + distribution. The asymptotic distribution of the likelihood ratio + test requires homoskedasticity. See [1]_ for details. + + References + ---------- + .. [1] Greene, William H. Econometric analysis. Pearson Education, 2003. + """ + name = "Likelihood Ratio Test for Diagonal Covariance" + resids = np.asarray(self.resids) + if resids.shape[1] == 1: + return InvalidTestStatistic( + "Cannot test covariance structure when the system contains a single " + "dependent variable.", + name=name, + ) + sigma = resids.T @ resids / resids.shape[0] + nobs, k = resids.shape + _, logdet = np.linalg.slogdet(sigma) + stat = nobs * (np.log(np.diag(sigma)).sum() - logdet) + return WaldTestStatistic( + stat, + "Covariance is diagonal", + k * (k - 1) // 2, + name=name, + ) + class SystemEquationResult(_CommonResults): """ diff --git a/linearmodels/tests/system/test_sur.py b/linearmodels/tests/system/test_sur.py index 39a1e2301b..1c0f87a8f1 100644 --- a/linearmodels/tests/system/test_sur.py +++ b/linearmodels/tests/system/test_sur.py @@ -878,3 +878,28 @@ def test_brequsch_pagan(k): assert_allclose(stat.pval, 1.0 - scipy.stats.chi2(3).cdf(direct)) assert "Residuals are uncorrelated" in stat.null assert "Breusch-Pagan" in str(stat) + + +@pytest.mark.parametrize("k", [1, 3]) +def test_likelihood_ratio(k): + eqns = generate_data(k=k) + mod = SUR(eqns) + res = mod.fit() + stat = res.likelihood_ratio() + if k == 1: + assert isinstance(stat, InvalidTestStatistic) + assert "Likelihood Ratio Test" in str(stat) + assert np.isnan(stat.stat) + return + eps = np.asarray(res.resids) + sigma = eps.T @ eps / eps.shape[0] + nobs = res.resids.shape[0] + direct = np.linalg.slogdet(sigma * np.eye(k))[1] + direct -= np.linalg.slogdet(sigma)[1] + direct *= nobs + assert isinstance(stat, WaldTestStatistic) + assert_allclose(stat.stat, direct) + assert stat.df == 3 + assert_allclose(stat.pval, 1.0 - scipy.stats.chi2(3).cdf(direct)) + assert "Covariance is diagonal" in stat.null + assert "Likelihood Ratio Test" in str(stat)