ENH: Add typing information

Add types to IV models Change the return type of an invalid test to InvalidTest Silence useless warnings All optimization options to be passed to IVGMMCUE
bashtage · Aug 29, 2018 · b136d28 · b136d28
1 parent 528d7b9
commit b136d28
Show file tree

Hide file tree

Showing 9 changed files with 87 additions and 27 deletions.
diff --git a/linearmodels/iv/model.py b/linearmodels/iv/model.py
@@ -18,6 +18,8 @@
                                  KernelWeightMatrix,
                                  OneWayClusteredWeightMatrix)
 from linearmodels.iv.results import IVGMMResults, IVResults, OLSResults
+from linearmodels.typing import Numeric, OptionalNumeric
+from linearmodels.typing.iv import ArrayLike, OptionalArrayLike
 from linearmodels.utility import (WaldTestStatistic, has_constant, inv_sqrth,
                                   missing_warning)
 
@@ -105,11 +107,13 @@ class IVLIML(object):
     IV2SLS, IVGMM, IVGMMCUE
     """
 
-    def __init__(self, dependent, exog, endog, instruments, *, weights=None,
-                 fuller=0, kappa=None):
+    def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike,
+                 endog: OptionalArrayLike, instruments: OptionalArrayLike, *,
+                 weights: OptionalArrayLike = None, fuller: Numeric = 0,
+                 kappa: OptionalNumeric = None):
 
         self.dependent = IVData(dependent, var_name='dependent')
-        nobs = self.dependent.shape[0]
+        nobs = self.dependent.shape[0]  # type: int
         self.exog = IVData(exog, var_name='exog', nobs=nobs)
         self.endog = IVData(endog, var_name='endog', nobs=nobs)
         self.instruments = IVData(instruments, var_name='instruments', nobs=nobs)
@@ -573,7 +577,9 @@ class IV2SLS(IVLIML):
     IVLIML, IVGMM, IVGMMCUE
     """
 
-    def __init__(self, dependent, exog, endog, instruments, *, weights=None):
+    def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike,
+                 endog: OptionalArrayLike, instruments: OptionalArrayLike, *,
+                 weights: OptionalArrayLike = None):
         self._method = 'IV-2SLS'
         super(IV2SLS, self).__init__(dependent, exog, endog, instruments,
                                      weights=weights, fuller=0, kappa=1)
@@ -675,8 +681,10 @@ class IVGMM(IVLIML):
     IV2SLS, IVLIML, IVGMMCUE
     """
 
-    def __init__(self, dependent, exog, endog, instruments, *, weights=None,
-                 weight_type='robust', **weight_config):
+    def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike,
+                 endog: OptionalArrayLike, instruments: OptionalArrayLike, *,
+                 weights: OptionalArrayLike = None,
+                 weight_type: str = 'robust', **weight_config):
         self._method = 'IV-GMM'
         self._result_container = IVGMMResults
         super(IVGMM, self).__init__(dependent, exog, endog, instruments, weights=weights)
@@ -914,8 +922,10 @@ class IVGMMCUE(IVGMM):
     IV2SLS, IVLIML, IVGMM
     """
 
-    def __init__(self, dependent, exog, endog, instruments, *, weights=None,
-                 weight_type='robust', **weight_config):
+    def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike,
+                 endog: OptionalArrayLike, instruments: OptionalArrayLike, *,
+                 weights: OptionalArrayLike = None,
+                 weight_type: str = 'robust', **weight_config):
         self._method = 'IV-GMM-CUE'
         super(IVGMMCUE, self).__init__(dependent, exog, endog, instruments, weights=weights,
                                        weight_type=weight_type, **weight_config)
@@ -1017,7 +1027,7 @@ def j(self, params, x, y, z):
         g_bar = (z * eps).mean(0)
         return nobs * g_bar.T @ w @ g_bar.T
 
-    def estimate_parameters(self, starting, x, y, z, display=False):
+    def estimate_parameters(self, starting, x, y, z, display=False, opt_options=None):
         r"""
         Parameters
         ----------
@@ -1031,6 +1041,9 @@ def estimate_parameters(self, starting, x, y, z, display=False):
             Instrument matrix (nobs by ninstr)
         display : bool
             Flag indicating whether to display iterative optimizer output
+        opt_options : dict, optional
+            Dictionary containing additional keyword arguments to pass to
+            scipy.optimize.minimize.
 
         Returns
         -------
@@ -1047,11 +1060,18 @@ def estimate_parameters(self, starting, x, y, z, display=False):
         scipy.optimize.minimize
         """
         args = (x, y, z)
-        res = minimize(self.j, starting, args=args, options={'disp': display})
+        opt_options = {} if opt_options is None else opt_options
+        options = {'disp': display}
+        if 'options' in opt_options:
+            opt_options = opt_options.copy()
+            options.update(opt_options.pop('options'))
+
+        res = minimize(self.j, starting, args=args, options=options, **opt_options)
 
         return res.x[:, None], res.nit
 
-    def fit(self, *, starting=None, display=False, cov_type='robust', **cov_config):
+    def fit(self, *, starting=None, display=False, cov_type='robust', opt_options=None,
+            **cov_config):
         r"""
         Estimate model parameters
 
@@ -1064,6 +1084,10 @@ def fit(self, *, starting=None, display=False, cov_type='robust', **cov_config):
             Flag indicating whether to display optimization output
         cov_type : str, optional
             Name of covariance estimator to use
+        opt_options : dict, optional
+            Additional options to pass to scipy.optimize.minimize when
+            optimizing the objective function. If not provided, defers to
+            scipy to choose an appropriate optimizer.
         **cov_config
             Additional parameters to pass to covariance estimator
 
@@ -1080,10 +1104,6 @@ def fit(self, *, starting=None, display=False, cov_type='robust', **cov_config):
         is provided.
 
         Starting values are computed by IVGMM.
-
-        .. todo::
-
-          * Expose method to pass optimization options
         """
 
         wy, wx, wz = self._wy, self._wx, self._wz
@@ -1103,7 +1123,8 @@ def fit(self, *, starting=None, display=False, cov_type='robust', **cov_config):
             if len(starting) != self.exog.shape[1] + self.endog.shape[1]:
                 raise ValueError('starting does not have the correct number '
                                  'of values')
-        params, iters = self.estimate_parameters(starting, wx, wy, wz, display)
+        params, iters = self.estimate_parameters(starting, wx, wy, wz, display,
+                                                 opt_options=opt_options)
         eps = wy - wx @ params
         wmat = inv(weight_matrix(wx, wz, eps))
 
@@ -1140,6 +1161,7 @@ class _OLS(IVLIML):
     statsmodels.regression.linear_model.GLS
     """
 
-    def __init__(self, dependent, exog, *, weights=None):
+    def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike, *,
+                 weights: OptionalArrayLike = None):
         super(_OLS, self).__init__(dependent, exog, None, None, weights=weights, kappa=0.0)
         self._result_container = OLSResults
diff --git a/linearmodels/iv/results.py b/linearmodels/iv/results.py
@@ -220,13 +220,13 @@ def cov_type(self):
         """Covariance estimator used"""
         return self._cov_type
 
-    @property
+    @cached_property
     def std_errors(self):
         """Estimated parameter standard errors"""
         std_errors = sqrt(diag(self.cov))
         return Series(std_errors, index=self._vars, name='stderr')
 
-    @property
+    @cached_property
     def tstats(self):
         """Parameter t-statistics"""
         return Series(self._params / self.std_errors, name='tstat')
@@ -834,12 +834,10 @@ def wooldridge_overid(self):
         instruments = self.model.instruments
         nobs, nendog = endog.shape
         ninstr = instruments.shape[1]
+        name = 'Wooldridge\'s score test of overidentification'
         if ninstr - nendog == 0:
-            import warnings
-            warnings.warn('Test requires more instruments than '
-                          'endogenous variables',
-                          UserWarning)
-            return WaldTestStatistic(0, 'Test is not feasible.', 1, name='Infeasible test.')
+            return InvalidTestStatistic('Test requires more instruments than '
+                                        'endogenous variables.', name=name)
 
         endog_hat = proj(endog.ndarray, c_[exog.ndarray, instruments.ndarray])
         q = instruments.ndarray[:, :(ninstr - nendog)]
@@ -850,7 +848,6 @@ def wooldridge_overid(self):
         stat = res.nobs * res.rsquared
         df = ninstr - nendog
         null = 'Model is not overidentified.'
-        name = 'Wooldridge\'s score test of overidentification'
         return WaldTestStatistic(stat, null, df, name=name)
 
     @cached_property

diff --git a/linearmodels/tests/iv/test_model.py b/linearmodels/tests/iv/test_model.py
@@ -342,3 +342,21 @@ def test_gmm_str(data):
     str(mod.fit(cov_type='robust'))
     str(mod.fit(cov_type='clustered', clusters=data.clusters))
     str(mod.fit(cov_type='kernel'))
+
+
+def test_gmm_cue_optimization_options(data):
+    mod = IVGMMCUE(data.dep, data.exog, data.endog, data.instr)
+    res_none = mod.fit(display=False)
+    opt_options = dict(method='BFGS', options={'disp': False})
+    res_bfgs = mod.fit(display=False, opt_options=opt_options)
+    opt_options = dict(method='L-BFGS-B', options={'disp': False})
+    res_lbfgsb = mod.fit(display=False, opt_options=opt_options)
+    assert res_none.iterations > 2
+    assert res_bfgs.iterations > 2
+    assert res_lbfgsb.iterations > 2
+
+    mod2 = IVGMM(data.dep, data.exog, data.endog, data.instr)
+    res2 = mod2.fit()
+    assert res_none.j_stat.stat <= res2.j_stat.stat
+    assert res_bfgs.j_stat.stat <= res2.j_stat.stat
+    assert res_lbfgsb.j_stat.stat <= res2.j_stat.stat
diff --git a/linearmodels/tests/panel/test_panel_ols.py b/linearmodels/tests/panel/test_panel_ols.py
@@ -31,7 +31,7 @@ def data(request):
 @pytest.fixture(params=perms, ids=ids)
 def large_data(request):
     missing, datatype, const = request.param
-    return generate_data(missing, datatype, const=const, ntk=(51, 30, 5), other_effects=2)
+    return generate_data(missing, datatype, const=const, ntk=(51, 71, 5), other_effects=2)
 
 
 perms = list(product(missing, datatypes))

diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py
@@ -91,6 +91,7 @@ def test_incorrect_type(data):
         compare(dict(model1=res, model2=res2))
 
 
+@pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
 def test_predict(generated_data):
     mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True)
     res = mod.fit()
@@ -124,6 +125,7 @@ def test_predict(generated_data):
     assert pred.shape == (PanelData(generated_data.y).dataframe.shape[0], 3)
 
 
+@pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
 def test_predict_no_selection(generated_data):
     mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True)
     res = mod.fit()

diff --git a/linearmodels/tests/system/test_sur.py b/linearmodels/tests/system/test_sur.py
@@ -637,6 +637,7 @@ def test_fitted(data):
     assert_frame_equal(expected, res.fitted_values)
 
 
+@pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
 def test_predict(missing_data):
     mod = SUR(missing_data)
     res = mod.fit()
@@ -674,6 +675,7 @@ def test_predict(missing_data):
         assert pred[key].shape[0] == nobs
 
 
+@pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
 def test_predict_error(missing_data):
     mod = SUR(missing_data)
     res = mod.fit()

diff --git a/linearmodels/typing/__init__.py b/linearmodels/typing/__init__.py
@@ -0,0 +1,4 @@
+from typing import Union
+
+Numeric = Union[int, float]
+OptionalNumeric = Union[int, float, None]
diff --git a/linearmodels/typing/iv.py b/linearmodels/typing/iv.py
@@ -0,0 +1,15 @@
+from typing import Union
+
+import numpy as np
+import pandas as pd
+
+base_data_types = [np.ndarray, pd.DataFrame, pd.Series]
+try:
+    import xarray as xr
+
+    ArrayLike = Union[np.ndarray, xr.DataArray, pd.DataFrame, pd.Series]
+
+except ImportError:
+    ArrayLike = Union[np.ndarray, pd.DataFrame, pd.Series]
+
+OptionalArrayLike = Union[ArrayLike, None]
diff --git a/linearmodels/utility.py b/linearmodels/utility.py
@@ -439,7 +439,7 @@ def __init__(self, results):
     def _get_series_property(self, name):
         out = ([(k, getattr(v, name)) for k, v in self._results.items()])
         cols = [v[0] for v in out]
-        values = concat([v[1] for v in out], 1)
+        values = concat([v[1] for v in out], 1, sort=True)
         values.columns = cols
         return values