From e3e927fbe8d3d5ab02938fd7e8c5d68c02743409 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 29 Aug 2018 13:11:27 +0100 Subject: [PATCH 1/3] RLS: Release 4.9 commit Prepare for 4.9 release --- .lgtm.yml | 6 ++++++ README.md | 2 ++ doc/source/changes.rst | 8 ++++++-- doc/source/plan.rst | 4 ++++ linearmodels/panel/data.py | 5 ++--- linearmodels/panel/results.py | 3 +-- linearmodels/system/results.py | 3 +-- 7 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 .lgtm.yml diff --git a/.lgtm.yml b/.lgtm.yml new file mode 100644 index 0000000000..94e7cac309 --- /dev/null +++ b/.lgtm.yml @@ -0,0 +1,6 @@ +extraction: + python: + index: + exclude: + - versioneer.py + - linearmodels/_version.py diff --git a/README.md b/README.md index f84305d033..d2a0e7a831 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ [![Build Status](https://travis-ci.org/bashtage/linearmodels.svg?branch=master)](https://travis-ci.org/bashtage/linearmodels) [![codecov](https://codecov.io/gh/bashtage/linearmodels/branch/master/graph/badge.svg)](https://codecov.io/gh/bashtage/linearmodels) +[![Codacy Badge](https://api.codacy.com/project/badge/Grade/c771bce50a164b6fa71c344b374f140d)](https://www.codacy.com/app/bashtage/linearmodels?utm_source=github.com&utm_medium=referral&utm_content=bashtage/linearmodels&utm_campaign=Badge_Grade) +[![codebeat badge](https://codebeat.co/badges/aaae2fb4-72b5-4a66-97cd-77b93488f243)](https://codebeat.co/projects/github-com-bashtage-linearmodels-master) Linear (regression) models for Python. Extends [statsmodels](http://www.statsmodels.org) with Panel regression, diff --git a/doc/source/changes.rst b/doc/source/changes.rst index d85f676b05..eafbd7ccc2 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -1,8 +1,12 @@ Change Log ---------- -Since 4.8 -========= +Version 4.9 +=========== +* Changed the return type of Wooldridge's over identification test when + invalid to `InvalidTestStatistic` +* Add typing information to IV models +* Allow optimization parameters to be passed to `IVGMMCUE` * Removed internal use of pandas Panel * Improved performance in panel models when using `from_formula` * Switched to retaining index column names when original input index is named diff --git a/doc/source/plan.rst b/doc/source/plan.rst index e19eb8c08e..bbcf28d887 100644 --- a/doc/source/plan.rst +++ b/doc/source/plan.rst @@ -47,6 +47,10 @@ System Estimation ================= * Seemingly Unrelated Regression (SUR) Estimator - :class:`linearmodels.system.model.SUR` + - Multivariate OLS is supported as a special case of SUR. This method does + not perform well on large datasets and should be improved by a special + purpose implementation. + Instrumental Variable Estimators ******************************** * Three-stage Least Squares (3SLS) Estimator - :class:`linearmodels.system.model.IV3SLS` diff --git a/linearmodels/panel/data.py b/linearmodels/panel/data.py index 744b888709..cdee7b99f0 100644 --- a/linearmodels/panel/data.py +++ b/linearmodels/panel/data.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd -from numpy import ndarray from pandas import DataFrame, Panel, Series from linearmodels.compat.numpy import lstsq @@ -153,7 +152,7 @@ def __init__(self, x, var_name='x', convert_dummies=True, drop_first=True, copy= x = x.dataframe self._original = x - if not isinstance(x, (Series, DataFrame, Panel, ndarray)): + if not isinstance(x, (Series, DataFrame, Panel, np.ndarray)): try: from xarray import DataArray if isinstance(x, DataArray): @@ -191,7 +190,7 @@ def __init__(self, x, var_name='x', convert_dummies=True, drop_first=True, copy= self._frame = DataFrame({var_name: x.T.stack(dropna=False)}) else: self._frame = x.swapaxes(1, 2).to_frame(filter_observations=False) - elif isinstance(x, ndarray): + elif isinstance(x, np.ndarray): if x.ndim not in (2, 3): raise ValueError('2 or 3-d array required for numpy input') if x.ndim == 2: diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py index 05427a7e69..ba5ed9b451 100644 --- a/linearmodels/panel/results.py +++ b/linearmodels/panel/results.py @@ -1,7 +1,6 @@ import datetime as dt import numpy as np -from numpy import diag, sqrt from pandas import DataFrame, Series, concat from scipy import stats from statsmodels.iolib.summary import SimpleTable, fmt_2cols, fmt_params @@ -67,7 +66,7 @@ def cov(self): @property def std_errors(self): """Estimated parameter standard errors""" - return Series(sqrt(diag(self.cov)), self._var_names, name='std_error') + return Series(np.sqrt(np.diag(self.cov)), self._var_names, name='std_error') @property def tstats(self): diff --git a/linearmodels/system/results.py b/linearmodels/system/results.py index 3c6d83651b..a6504c22f8 100644 --- a/linearmodels/system/results.py +++ b/linearmodels/system/results.py @@ -1,7 +1,6 @@ import datetime as dt import numpy as np -from numpy import diag, sqrt from pandas import DataFrame, Series, concat from scipy import stats from statsmodels.iolib.summary import SimpleTable, fmt_2cols @@ -76,7 +75,7 @@ def params(self): @property def std_errors(self): """Estimated parameter standard errors""" - std_errors = sqrt(diag(self.cov)) + std_errors = np.sqrt(np.diag(self.cov)) return Series(std_errors, index=self._param_names, name='stderr') @property From 7f5f5ac68ae20788de5cd290bbaaa238f724ccb2 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 29 Aug 2018 13:49:09 +0100 Subject: [PATCH 2/3] CLN: Clean pandas compat Ensure concat works correctly Move testing out of compat --- linearmodels/compat/pandas.py | 31 +++- linearmodels/iv/data.py | 4 +- linearmodels/panel/data.py | 4 +- .../tests/asset_pricing/test_formulas.py | 6 +- linearmodels/tests/iv/test_data.py | 2 +- linearmodels/tests/iv/test_formulas.py | 15 +- linearmodels/tests/iv/test_results.py | 8 +- linearmodels/tests/panel/_utility.py | 28 ++-- .../panel/results/generate-panel-data.py | 3 +- linearmodels/tests/panel/test_data.py | 141 +++++++++--------- linearmodels/tests/panel/test_formula.py | 18 +-- linearmodels/tests/panel/test_results.py | 2 +- .../system/results/execute-stata-3sls.py | 4 +- .../tests/system/results/generate_data.py | 3 +- linearmodels/tests/system/test_3sls.py | 16 +- .../tests/system/test_3sls_against_stata.py | 4 +- linearmodels/tests/system/test_formulas.py | 2 +- linearmodels/tests/system/test_gmm.py | 37 ++--- linearmodels/tests/system/test_sur.py | 2 +- 19 files changed, 177 insertions(+), 153 deletions(-) diff --git a/linearmodels/compat/pandas.py b/linearmodels/compat/pandas.py index 3dd8f7dfd3..34a029d779 100644 --- a/linearmodels/compat/pandas.py +++ b/linearmodels/compat/pandas.py @@ -1,3 +1,27 @@ +from distutils.version import LooseVersion + +import pandas as pd + +PD_LT_023 = LooseVersion(pd.__version__) < LooseVersion('0.23') + + +def concat(*args, **kwargs): + """ + Shim around pandas concat that passes sort if allowed + + See pandas.compat + """ + if PD_LT_023 and 'sort' in kwargs: + kwargs = kwargs.copy() + del kwargs['sort'] + else: + if 'sort' not in kwargs: + kwargs = kwargs.copy() + kwargs['sort'] = True + + return pd.concat(*args, **kwargs) + + try: from pandas.api.types import (is_numeric_dtype, is_categorical, is_string_dtype, is_categorical_dtype, @@ -24,11 +48,6 @@ def is_string_like(obj): is_categorical, is_categorical_dtype, is_datetime64_any_dtype, is_string_like) -try: - from pandas.testing import assert_frame_equal, assert_series_equal -except ImportError: - from pandas.util.testing import assert_frame_equal, assert_series_equal - __all__ = ['is_string_dtype', 'is_numeric_dtype', 'is_categorical', 'is_string_like', 'is_categorical_dtype', 'is_datetime64_any_dtype', - 'assert_frame_equal', 'assert_series_equal'] + 'concat'] diff --git a/linearmodels/iv/data.py b/linearmodels/iv/data.py index 12ab3b7cd2..fa67426bea 100644 --- a/linearmodels/iv/data.py +++ b/linearmodels/iv/data.py @@ -8,7 +8,7 @@ from linearmodels.compat.pandas import (is_categorical, is_categorical_dtype, is_numeric_dtype, is_string_dtype, - is_string_like) + is_string_like, concat) dim_err = '{0} has too many dims. Maximum is 2, actual is {1}' type_err = 'Only ndarrays, DataArrays and Series and DataFrames are supported' @@ -25,7 +25,7 @@ def convert_columns(s, drop_first): def expand_categoricals(x, drop_first): if x.shape[1] == 0: return x - return pd.concat([convert_columns(x[c], drop_first) for c in x.columns], axis=1) + return concat([convert_columns(x[c], drop_first) for c in x.columns], axis=1) class IVData(object): diff --git a/linearmodels/panel/data.py b/linearmodels/panel/data.py index cdee7b99f0..d8b92b34de 100644 --- a/linearmodels/panel/data.py +++ b/linearmodels/panel/data.py @@ -8,7 +8,7 @@ from linearmodels.compat.pandas import (is_categorical, is_datetime64_any_dtype, is_numeric_dtype, is_string_dtype, - is_string_like) + is_string_like, concat) from linearmodels.utility import ensure_unique_column, panel_to_frame __all__ = ['PanelData'] @@ -89,7 +89,7 @@ def convert_columns(s, drop_first): def expand_categoricals(x, drop_first): - return pd.concat([convert_columns(x[c], drop_first) for c in x.columns], axis=1) + return concat([convert_columns(x[c], drop_first) for c in x.columns], axis=1) class PanelData(object): diff --git a/linearmodels/tests/asset_pricing/test_formulas.py b/linearmodels/tests/asset_pricing/test_formulas.py index ade736e0b6..3c341fab7b 100644 --- a/linearmodels/tests/asset_pricing/test_formulas.py +++ b/linearmodels/tests/asset_pricing/test_formulas.py @@ -1,11 +1,11 @@ import numpy as np -import pandas as pd import pytest +from pandas.testing import assert_frame_equal from linearmodels.asset_pricing.model import (LinearFactorModel, LinearFactorModelGMM, TradedFactorModel) -from linearmodels.compat.pandas import assert_frame_equal +from linearmodels.compat.pandas import concat from linearmodels.tests.asset_pricing._utility import generate_data FORMULA_FACTORS = 'factor_1 + factor_2 + factor_3' @@ -29,7 +29,7 @@ def non_traded_model(request): def data(request): premia = np.array([.1, .1, .1]) out = generate_data(nportfolio=10, output='pandas', alpha=True, premia=premia) - out['joined'] = pd.concat([out.factors, out.portfolios], 1) + out['joined'] = concat([out.factors, out.portfolios], 1) return out diff --git a/linearmodels/tests/iv/test_data.py b/linearmodels/tests/iv/test_data.py index 146272e716..177340647e 100644 --- a/linearmodels/tests/iv/test_data.py +++ b/linearmodels/tests/iv/test_data.py @@ -12,7 +12,7 @@ except ImportError: MISSING_XARRAY = True -from linearmodels.compat.pandas import assert_frame_equal, assert_series_equal +from pandas.testing import assert_frame_equal, assert_series_equal from linearmodels.iv.data import IVData diff --git a/linearmodels/tests/iv/test_formulas.py b/linearmodels/tests/iv/test_formulas.py index 04379208f8..69f87a7321 100644 --- a/linearmodels/tests/iv/test_formulas.py +++ b/linearmodels/tests/iv/test_formulas.py @@ -1,9 +1,10 @@ import numpy as np -import pandas as pd import pytest from numpy.testing import assert_allclose, assert_equal +from pandas import DataFrame, Categorical +from pandas.testing import assert_frame_equal -from linearmodels.compat.pandas import assert_frame_equal +from linearmodels.compat.pandas import concat from linearmodels.formula import iv_2sls, iv_gmm, iv_gmm_cue, iv_liml from linearmodels.iv import IV2SLS, IVGMM, IVGMMCUE, IVLIML @@ -49,7 +50,7 @@ def data(): y = x @ params + e cols = ['y'] + ['x' + str(i) for i in range(1, 6)] cols += ['z' + str(i) for i in range(1, 4)] - data = pd.DataFrame(np.c_[y, x, z], columns=cols) + data = DataFrame(np.c_[y, x, z], columns=cols) data['Intercept'] = 1.0 data['weights'] = np.random.chisquare(10, size=data.shape[0]) / 10 return data @@ -164,8 +165,8 @@ def test_categorical(model_and_func): y = np.random.randn(1000) x1 = np.random.randn(1000) d = np.random.randint(0, 4, 1000) - d = pd.Categorical(d) - data = pd.DataFrame({'y': y, 'x1': x1, 'd': d}) + d = Categorical(d) + data = DataFrame({'y': y, 'x1': x1, 'd': d}) data['Intercept'] = 1.0 model, func = model_and_func mod = model.from_formula(formula, data) @@ -199,7 +200,7 @@ def test_formula_function(data, model_and_func): dep = data.y exog = [data[['Intercept']], sigmoid(data[['x3']]), data[['x4']], np.exp(data[['x5']])] - exog = pd.concat(exog, 1) + exog = concat(exog, 1) endog = data[['x1', 'x2']] instr = data[['z1', 'z2', 'z3']] mod = model(dep, exog, endog, instr) @@ -220,7 +221,7 @@ def test_predict_formula_function(data, model_and_func): exog = [data[['Intercept']], sigmoid(data[['x3']]), data[['x4']], np.exp(data[['x5']])] - exog = pd.concat(exog, 1) + exog = concat(exog, 1) endog = data[['x1', 'x2']] pred = res.predict(exog, endog) pred2 = res.predict(data=data) diff --git a/linearmodels/tests/iv/test_results.py b/linearmodels/tests/iv/test_results.py index f1e8895d89..0929d1823d 100644 --- a/linearmodels/tests/iv/test_results.py +++ b/linearmodels/tests/iv/test_results.py @@ -1,8 +1,8 @@ -import pandas as pd import pytest from numpy.testing import assert_allclose +from pandas import DataFrame +from pandas.testing import assert_series_equal -from linearmodels.compat.pandas import assert_series_equal from linearmodels.iv.data import IVData from linearmodels.iv.model import IV2SLS, IVGMM, IVGMMCUE, IVLIML from linearmodels.tests.iv._utility import generate_data @@ -65,12 +65,12 @@ def test_fitted_predict(data, model): assert_series_equal(res.idiosyncratic, res.resids) y = mod.dependent.pandas expected = y.values - res.resids.values[:, None] - expected = pd.DataFrame(expected, y.index, ['fitted_values']) + expected = DataFrame(expected, y.index, ['fitted_values']) assert_frame_similar(expected, res.fitted_values) assert_allclose(expected, res.fitted_values) pred = res.predict() nobs = res.resids.shape[0] - assert isinstance(pred, pd.DataFrame) + assert isinstance(pred, DataFrame) assert pred.shape == (nobs, 1) pred = res.predict(idiosyncratic=True, missing=True) nobs = IVData(data.dep).pandas.shape[0] diff --git a/linearmodels/tests/panel/_utility.py b/linearmodels/tests/panel/_utility.py index e8395f8b44..79ecb9e68d 100644 --- a/linearmodels/tests/panel/_utility.py +++ b/linearmodels/tests/panel/_utility.py @@ -1,7 +1,8 @@ import numpy as np -import pandas as pd from numpy.random import standard_normal from numpy.testing import assert_allclose +from pandas import DataFrame, Categorical, get_dummies, date_range +from pandas.testing import assert_frame_equal, assert_series_equal from linearmodels.compat.numpy import lstsq from linearmodels.panel.data import PanelData @@ -13,7 +14,6 @@ MISSING_XARRAY = False except ImportError: MISSING_XARRAY = True -from linearmodels.compat.pandas import assert_frame_equal, assert_series_equal from linearmodels.utility import AttrDict datatypes = ['numpy', 'pandas'] @@ -21,30 +21,30 @@ datatypes += ['xarray'] -def lsdv(y: pd.DataFrame, x: pd.DataFrame, has_const=False, entity=False, time=False, +def lsdv(y: DataFrame, x: DataFrame, has_const=False, entity=False, time=False, general=None): nvar = x.shape[1] temp = x.reset_index() cat_index = temp.index if entity: - cat = pd.Categorical(temp.iloc[:, 0]) + cat = Categorical(temp.iloc[:, 0]) cat.index = cat_index - dummies = pd.get_dummies(cat, drop_first=has_const) - x = pd.DataFrame(np.c_[x.values, dummies.values.astype(np.float64)], + dummies = get_dummies(cat, drop_first=has_const) + x = DataFrame(np.c_[x.values, dummies.values.astype(np.float64)], index=x.index, columns=list(x.columns) + list(dummies.columns)) if time: - cat = pd.Categorical(temp.iloc[:, 1]) + cat = Categorical(temp.iloc[:, 1]) cat.index = cat_index - dummies = pd.get_dummies(cat, drop_first=(has_const or entity)) - x = pd.DataFrame(np.c_[x.values, dummies.values.astype(np.float64)], + dummies = get_dummies(cat, drop_first=(has_const or entity)) + x = DataFrame(np.c_[x.values, dummies.values.astype(np.float64)], index=x.index, columns=list(x.columns) + list(dummies.columns)) if general is not None: - cat = pd.Categorical(general) + cat = Categorical(general) cat.index = cat_index - dummies = pd.get_dummies(cat, drop_first=(has_const or entity or time)) - x = pd.DataFrame(np.c_[x.values, dummies.values.astype(np.float64)], + dummies = get_dummies(cat, drop_first=(has_const or entity or time)) + x = DataFrame(np.c_[x.values, dummies.values.astype(np.float64)], index=x.index, columns=list(x.columns) + list(dummies.columns)) w = np.ones_like(y) @@ -92,9 +92,9 @@ def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects if datatype in ('pandas', 'xarray'): entities = ['firm' + str(i) for i in range(n)] - time = pd.date_range('1-1-1900', periods=t, freq='A-DEC') + time = date_range('1-1-1900', periods=t, freq='A-DEC') var_names = ['x' + str(i) for i in range(k)] - # y = pd.DataFrame(y, index=time, columns=entities) + # y = DataFrame(y, index=time, columns=entities) y = panel_to_frame(y[None], items=['y'], major_axis=time, minor_axis=entities, swap=True) w = panel_to_frame(w[None], items=['w'], major_axis=time, minor_axis=entities, swap=True) w = w.reindex(y.index) diff --git a/linearmodels/tests/panel/results/generate-panel-data.py b/linearmodels/tests/panel/results/generate-panel-data.py index 9497a0ef3d..a9bac7a490 100644 --- a/linearmodels/tests/panel/results/generate-panel-data.py +++ b/linearmodels/tests/panel/results/generate-panel-data.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd +from linearmodels.compat.pandas import concat from linearmodels.panel.data import PanelData from linearmodels.utility import panel_to_frame @@ -30,7 +31,7 @@ y = PanelData(y) w = PanelData(w) -z = pd.concat([x.dataframe, y.dataframe, w.dataframe], 1) +z = concat([x.dataframe, y.dataframe, w.dataframe], 1) final_index = pd.MultiIndex.from_product([minor, major]) final_index.levels[0].name = 'firm' z = z.reindex(final_index) diff --git a/linearmodels/tests/panel/test_data.py b/linearmodels/tests/panel/test_data.py index de9a582690..de59f39770 100644 --- a/linearmodels/tests/panel/test_data.py +++ b/linearmodels/tests/panel/test_data.py @@ -1,12 +1,13 @@ from itertools import product import numpy as np -import pandas as pd import pytest from numpy.linalg import pinv from numpy.testing import assert_allclose, assert_equal +from pandas import DataFrame, date_range, Categorical, get_dummies, Series, datetime +from pandas.testing import assert_frame_equal -from linearmodels.compat.pandas import assert_frame_equal, is_string_dtype +from linearmodels.compat.pandas import is_string_dtype try: import xarray as xr @@ -39,7 +40,7 @@ def mi_df(): np.random.seed(12345) n, t, k = 11, 7, 3 x = np.random.standard_normal((k, t, n)) - major = pd.date_range('12-31-1999', periods=7) + major = date_range('12-31-1999', periods=7) items = ['var.{0}'.format(i) for i in range(1, k + 1)] minor = ['entities.{0}'.format(i) for i in range(1, n + 1)] return panel_to_frame(x, items, major, minor, swap=True) @@ -85,7 +86,7 @@ def test_numpy_2d(): def test_pandas_multiindex_dataframe(): n, t, k = 11, 7, 3 x = np.random.random((n, t, k)) - major = pd.date_range('12-31-1999', periods=7) + major = date_range('12-31-1999', periods=7) minor = ['var.{0}'.format(i) for i in range(1, k + 1)] items = ['item.{0}'.format(i) for i in range(1, n + 1)] x = panel_to_frame(x, items=items, major_axis=major, minor_axis=minor, swap=True) @@ -95,16 +96,16 @@ def test_pandas_multiindex_dataframe(): def test_pandas_dataframe(): t, n = 11, 7 x = np.random.random((t, n)) - index = pd.date_range('12-31-1999', periods=t) + index = date_range('12-31-1999', periods=t) cols = ['entity.{0}'.format(i) for i in range(1, n + 1)] - x = pd.DataFrame(x, columns=cols, index=index) + x = DataFrame(x, columns=cols, index=index) PanelData(x) def test_existing_panel_data(): n, t, k = 11, 7, 3 x = np.random.random((k, t, n)) - major = pd.date_range('12-31-1999', periods=7) + major = date_range('12-31-1999', periods=7) items = ['var.{0}'.format(i) for i in range(1, k + 1)] minor = ['entities.{0}'.format(i) for i in range(1, n + 1)] x = panel_to_frame(x, items=items, major_axis=major, minor_axis=minor, swap=True) @@ -170,10 +171,10 @@ def test_missing(mi_df): def test_incorrect_dataframe(): grouped = np.array(list([i] * 10 for i in range(10))).ravel() - df = pd.DataFrame({'a': np.arange(100), - 'b': grouped, - 'c': np.random.permutation(grouped), - 'data': np.random.randn(100)}) + df = DataFrame({'a': np.arange(100), + 'b': grouped, + 'c': np.random.permutation(grouped), + 'data': np.random.randn(100)}) df = df.set_index(['a', 'b', 'c']) with pytest.raises(ValueError): PanelData(df) @@ -250,24 +251,24 @@ def test_demean_against_dummy_regression(data): df = dh.dataframe no_index = df.reset_index() - cat = pd.Categorical(no_index[df.index.levels[0].name]) - d = pd.get_dummies(cat, drop_first=False).astype(np.float64) + cat = Categorical(no_index[df.index.levels[0].name]) + d = get_dummies(cat, drop_first=False).astype(np.float64) dummy_demeaned = df.values - d @ lstsq(d, df.values)[0] entity_demean = dh.demean('entity') assert_allclose(1 + np.abs(entity_demean.values2d), 1 + np.abs(dummy_demeaned)) - cat = pd.Categorical(no_index[df.index.levels[1].name]) - d = pd.get_dummies(cat, drop_first=False).astype(np.float64) + cat = Categorical(no_index[df.index.levels[1].name]) + d = get_dummies(cat, drop_first=False).astype(np.float64) dummy_demeaned = df.values - d @ lstsq(d, df.values)[0] time_demean = dh.demean('time') assert_allclose(1 + np.abs(time_demean.values2d), 1 + np.abs(dummy_demeaned)) - cat = pd.Categorical(no_index[df.index.levels[0].name]) - d1 = pd.get_dummies(cat, drop_first=False).astype(np.float64) - cat = pd.Categorical(no_index[df.index.levels[1].name]) - d2 = pd.get_dummies(cat, drop_first=True).astype(np.float64) + cat = Categorical(no_index[df.index.levels[0].name]) + d1 = get_dummies(cat, drop_first=False).astype(np.float64) + cat = Categorical(no_index[df.index.levels[1].name]) + d2 = get_dummies(cat, drop_first=True).astype(np.float64) d = np.c_[d1.values, d2.values] dummy_demeaned = df.values - d @ lstsq(d, df.values)[0] both_demean = dh.demean('both') @@ -352,7 +353,7 @@ def test_demean_many_missing_dropped(mi_df): def test_demean_both_large_t(): x = np.random.standard_normal((1, 100, 10)) - time = pd.date_range('1-1-2000', periods=100) + time = date_range('1-1-2000', periods=100) entities = ['entity.{0}'.format(i) for i in range(10)] data = panel_to_frame(x, ['x'], time, entities, swap=True) data = PanelData(data) @@ -360,10 +361,10 @@ def test_demean_both_large_t(): df = data.dataframe no_index = df.reset_index() - cat = pd.Categorical(no_index[df.index.levels[0].name]) - d1 = pd.get_dummies(cat, drop_first=False).astype(np.float64) - cat = pd.Categorical(no_index[df.index.levels[1].name]) - d2 = pd.get_dummies(cat, drop_first=True).astype(np.float64) + cat = Categorical(no_index[df.index.levels[0].name]) + d1 = get_dummies(cat, drop_first=False).astype(np.float64) + cat = Categorical(no_index[df.index.levels[1].name]) + d2 = get_dummies(cat, drop_first=True).astype(np.float64) d = np.c_[d1.values, d2.values] dummy_demeaned = df.values - d @ pinv(d) @ df.values assert_allclose(1 + np.abs(demeaned.values2d), @@ -395,7 +396,7 @@ def test_roundtrip_3d(data): x = data.x xpd = PanelData(x) xv = x if isinstance(x, np.ndarray) else x.values - if isinstance(x, pd.DataFrame): + if isinstance(x, DataFrame): assert_equal(xpd.values2d, xv) else: assert_equal(xpd.values3d, xv) @@ -414,7 +415,7 @@ def test_invalid_seires(mi_df): def test_demean_missing_alt_types(data): - check = isinstance(data.x, (pd.DataFrame, np.ndarray)) + check = isinstance(data.x, (DataFrame, np.ndarray)) xpd = PanelData(data.x) xpd.drop(xpd.isnull) entity_demean = xpd.demean('entity') @@ -495,7 +496,7 @@ def test_demean_weighted(data): w.drop(missing) entity_demean = x.demean('entity', weights=w) - d = pd.get_dummies(pd.Categorical(x.index.labels[0])) + d = get_dummies(Categorical(x.index.labels[0])) d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d @@ -506,7 +507,7 @@ def test_demean_weighted(data): 1 + np.abs(e)) time_demean = x.demean('time', weights=w) - d = pd.get_dummies(pd.Categorical(x.index.labels[1])) + d = get_dummies(Categorical(x.index.labels[1])) d = d.values root_w = np.sqrt(w.values2d) wx = root_w * x.values2d @@ -525,7 +526,7 @@ def test_mean_weighted(data): w.drop(missing) entity_mean = x.mean('entity', weights=w) c = x.index.levels[0][x.index.labels[0]] - d = pd.get_dummies(pd.Categorical(c, ordered=True)) + d = get_dummies(Categorical(c, ordered=True)) d = d[entity_mean.index] d = d.values root_w = np.sqrt(w.values2d) @@ -536,7 +537,7 @@ def test_mean_weighted(data): time_mean = x.mean('time', weights=w) c = x.index.levels[1][x.index.labels[1]] - d = pd.get_dummies(pd.Categorical(c, ordered=True)) + d = get_dummies(Categorical(c, ordered=True)) d = d[time_mean.index] d = d.values root_w = np.sqrt(w.values2d) @@ -550,7 +551,7 @@ def test_categorical_conversion(): t, n = 3, 1000 string = np.random.choice(['a', 'b', 'c'], (t, n)) num = np.random.randn(t, n) - time = pd.date_range('1-1-2000', periods=t) + time = date_range('1-1-2000', periods=t) entities = ['entity.{0}'.format(i) for i in range(n)] p = panel_to_frame(None, items=['a', 'b'], major_axis=time, minor_axis=entities, swap=True) @@ -559,7 +560,7 @@ def test_categorical_conversion(): p = p[['a', 'b']] panel = PanelData(p, convert_dummies=False) df = panel.dataframe.copy() - df['a'] = pd.Categorical(df['a']) + df['a'] = Categorical(df['a']) panel = PanelData(df, convert_dummies=True) df = panel.dataframe @@ -581,7 +582,7 @@ def test_string_conversion(): t, n = 3, 1000 string = np.random.choice(['a', 'b', 'c'], (t, n)) num = np.random.randn(t, n) - time = pd.date_range('1-1-2000', periods=t) + time = date_range('1-1-2000', periods=t) entities = ['entity.{0}'.format(i) for i in range(n)] p = panel_to_frame(None, items=['a', 'b'], major_axis=time, minor_axis=entities, swap=True) @@ -608,7 +609,7 @@ def test_string_nonconversion(): t, n = 3, 1000 string = np.random.choice(['a', 'b', 'c'], (t, n)) num = np.random.randn(t, n) - time = pd.date_range('1-1-2000', periods=t) + time = date_range('1-1-2000', periods=t) entities = ['entity.{0}'.format(i) for i in range(n)] p = panel_to_frame(None, items=['a', 'b'], major_axis=time, minor_axis=entities, swap=True) @@ -628,19 +629,19 @@ def test_repr_html(mi_df): def test_general_demean_oneway(mi_df): y = PanelData(mi_df) dm1 = y.demean('entity') - g = pd.DataFrame(y.entity_ids, index=y.index) + g = DataFrame(y.entity_ids, index=y.index) dm2 = y.general_demean(g) assert_allclose(dm1.values2d, dm2.values2d) dm1 = y.demean('time') - g = pd.DataFrame(y.time_ids, index=y.index) + g = DataFrame(y.time_ids, index=y.index) dm2 = y.general_demean(g) assert_allclose(dm1.values2d, dm2.values2d) - g = pd.DataFrame(np.random.randint(0, 10, g.shape), index=y.index) + g = DataFrame(np.random.randint(0, 10, g.shape), index=y.index) dm2 = y.general_demean(g) - g = pd.Categorical(g.iloc[:, 0]) - d = pd.get_dummies(g) + g = Categorical(g.iloc[:, 0]) + d = get_dummies(g) dm1 = y.values2d - d @ lstsq(d, y.values2d)[0] assert_allclose(dm1, dm2.values2d) @@ -648,17 +649,17 @@ def test_general_demean_oneway(mi_df): def test_general_demean_twoway(mi_df): y = PanelData(mi_df) dm1 = y.demean('both') - g = pd.DataFrame(y.entity_ids, index=y.index) - g['column2'] = pd.Series(y.time_ids.squeeze(), index=y.index) + g = DataFrame(y.entity_ids, index=y.index) + g['column2'] = Series(y.time_ids.squeeze(), index=y.index) dm2 = y.general_demean(g) assert_allclose(dm1.values2d, dm2.values2d) - g = pd.DataFrame(np.random.randint(0, 10, g.shape), index=y.index) + g = DataFrame(np.random.randint(0, 10, g.shape), index=y.index) dm2 = y.general_demean(g) - g1 = pd.Categorical(g.iloc[:, 0]) - d1 = pd.get_dummies(g1) - g2 = pd.Categorical(g.iloc[:, 1]) - d2 = pd.get_dummies(g2, drop_first=True) + g1 = Categorical(g.iloc[:, 0]) + d1 = get_dummies(g1) + g2 = Categorical(g.iloc[:, 1]) + d2 = get_dummies(g2, drop_first=True) d = np.c_[d1, d2] dm1 = y.values2d - d @ lstsq(d, y.values2d)[0] assert_allclose(dm1 - dm2.values2d, np.zeros_like(dm2.values2d), atol=1e-7) @@ -667,7 +668,7 @@ def test_general_demean_twoway(mi_df): def test_general_unit_weighted_demean_oneway(mi_df): y = PanelData(mi_df) dm1 = y.demean('entity') - g = PanelData(pd.DataFrame(y.entity_ids, index=y.index)) + g = PanelData(DataFrame(y.entity_ids, index=y.index)) weights = PanelData(g).copy() weights.dataframe.iloc[:, :] = 1 dm2 = y.general_demean(g, weights) @@ -676,18 +677,18 @@ def test_general_unit_weighted_demean_oneway(mi_df): assert_allclose(dm3.values2d, dm2.values2d) dm1 = y.demean('time') - g = PanelData(pd.DataFrame(y.time_ids, index=y.index)) + g = PanelData(DataFrame(y.time_ids, index=y.index)) dm2 = y.general_demean(g, weights) assert_allclose(dm1.values2d, dm2.values2d) dm3 = y.general_demean(g) assert_allclose(dm3.values2d, dm2.values2d) - g = PanelData(pd.DataFrame(np.random.randint(0, 10, g.dataframe.shape), - index=y.index)) + g = PanelData(DataFrame(np.random.randint(0, 10, g.dataframe.shape), + index=y.index)) dm2 = y.general_demean(g, weights) dm3 = y.general_demean(g) - g = pd.Categorical(g.dataframe.iloc[:, 0]) - d = pd.get_dummies(g) + g = Categorical(g.dataframe.iloc[:, 0]) + d = get_dummies(g) dm1 = y.values2d - d @ lstsq(d, y.values2d)[0] assert_allclose(dm1, dm2.values2d) assert_allclose(dm3.values2d, dm2.values2d) @@ -695,25 +696,25 @@ def test_general_unit_weighted_demean_oneway(mi_df): def test_general_weighted_demean_oneway(mi_df): y = PanelData(mi_df) - weights = pd.DataFrame( + weights = DataFrame( np.random.chisquare(10, (y.dataframe.shape[0], 1)) / 10, index=y.index) w = PanelData(weights) dm1 = y.demean('entity', weights=w) - g = PanelData(pd.DataFrame(y.entity_ids, index=y.index)) + g = PanelData(DataFrame(y.entity_ids, index=y.index)) dm2 = y.general_demean(g, w) assert_allclose(dm1.values2d, dm2.values2d) dm1 = y.demean('time', weights=w) - g = PanelData(pd.DataFrame(y.time_ids, index=y.index)) + g = PanelData(DataFrame(y.time_ids, index=y.index)) dm2 = y.general_demean(g, w) assert_allclose(dm1.values2d, dm2.values2d) - g = PanelData(pd.DataFrame(np.random.randint(0, 10, g.dataframe.shape), - index=y.index)) + g = PanelData(DataFrame(np.random.randint(0, 10, g.dataframe.shape), + index=y.index)) dm2 = y.general_demean(g, w) - g = pd.Categorical(g.dataframe.iloc[:, 0]) - d = pd.get_dummies(g) + g = Categorical(g.dataframe.iloc[:, 0]) + d = get_dummies(g) wd = np.sqrt(w.values2d) * d wy = np.sqrt(w.values2d) * y.values2d dm1 = wy - wd @ lstsq(wd, wy)[0] @@ -723,23 +724,23 @@ def test_general_weighted_demean_oneway(mi_df): def test_general_unit_weighted_demean_twoway(mi_df): np.random.seed(12345) y = PanelData(mi_df) - weights = pd.DataFrame( + weights = DataFrame( np.random.chisquare(10, (y.dataframe.shape[0], 1)) / 10, index=y.index) w = PanelData(weights) dm1 = y.demean('both', weights=w) - g = pd.DataFrame(y.entity_ids, index=y.index) - g['column2'] = pd.Series(y.time_ids.squeeze(), index=y.index) + g = DataFrame(y.entity_ids, index=y.index) + g['column2'] = Series(y.time_ids.squeeze(), index=y.index) dm2 = y.general_demean(g, weights=w) assert_allclose(dm1.values2d - dm2.values2d, np.zeros_like(dm2.values2d), atol=1e-7) - g = pd.DataFrame(np.random.randint(0, 10, g.shape), index=y.index) + g = DataFrame(np.random.randint(0, 10, g.shape), index=y.index) dm2 = y.general_demean(g, weights=w) - g1 = pd.Categorical(g.iloc[:, 0]) - d1 = pd.get_dummies(g1) - g2 = pd.Categorical(g.iloc[:, 1]) - d2 = pd.get_dummies(g2, drop_first=True) + g1 = Categorical(g.iloc[:, 0]) + d1 = get_dummies(g1) + g2 = Categorical(g.iloc[:, 1]) + d2 = get_dummies(g2, drop_first=True) d = np.c_[d1, d2] wd = np.sqrt(w.values2d) * d wy = np.sqrt(w.values2d) * y.values2d @@ -753,7 +754,7 @@ def test_original_unmodified(data): pre_w = data.w.copy() mod = PanelOLS(data.y, data.x, weights=data.w) mod.fit(debiased=True) - if isinstance(data.y, (pd.DataFrame)): + if isinstance(data.y, (DataFrame)): for after, before in ((data.y, pre_y), (data.x, pre_x), (data.w, pre_w)): assert_frame_equal(before, after) @@ -794,7 +795,7 @@ def test_incorrect_time_axis(): var_names = ['var.{0}'.format(i) for i in range(3)] p = panel_to_frame(x, items=var_names, major_axis=time, minor_axis=entities, swap=True) - p.index = p.index.set_levels([1, pd.datetime(1960, 1, 1), 'a'], 1) + p.index = p.index.set_levels([1, datetime(1960, 1, 1), 'a'], 1) with pytest.raises(ValueError): PanelData(p) @@ -820,7 +821,7 @@ def test_incorrect_time_axis_xarray(): def test_named_index(data): pdata = PanelData(data.x) - if isinstance(data.x, pd.DataFrame): + if isinstance(data.x, DataFrame): assert pdata.dataframe.index.levels[0].name == data.x.index.levels[0].name assert pdata.dataframe.index.levels[1].name == data.x.index.levels[1].name diff --git a/linearmodels/tests/panel/test_formula.py b/linearmodels/tests/panel/test_formula.py index 057fd07a22..27bfc497d0 100644 --- a/linearmodels/tests/panel/test_formula.py +++ b/linearmodels/tests/panel/test_formula.py @@ -1,10 +1,10 @@ from itertools import product import numpy as np -import pandas as pd import pytest +from pandas import DataFrame +from pandas.testing import assert_frame_equal -from linearmodels.compat.pandas import assert_frame_equal from linearmodels.formula import (between_ols, first_difference_ols, panel_ols, pooled_ols, random_effects, fama_macbeth) from linearmodels.panel.model import (BetweenOLS, FirstDifferenceOLS, PanelOLS, @@ -52,7 +52,7 @@ def sigmoid(v): def test_basic_formulas(data, models, formula): - if not isinstance(data.y, pd.DataFrame): + if not isinstance(data.y, DataFrame): return joined = data.x joined['y'] = data.y @@ -99,7 +99,7 @@ def test_basic_formulas(data, models, formula): def test_basic_formulas_math_op(data, models, formula): - if not isinstance(data.y, pd.DataFrame): + if not isinstance(data.y, DataFrame): return joined = data.x joined['y'] = data.y @@ -113,7 +113,7 @@ def test_basic_formulas_math_op(data, models, formula): def test_panel_ols_formulas_math_op(data): - if not isinstance(data.y, pd.DataFrame): + if not isinstance(data.y, DataFrame): return joined = data.x joined['y'] = data.y @@ -123,7 +123,7 @@ def test_panel_ols_formulas_math_op(data): def test_panel_ols_formula(data): - if not isinstance(data.y, pd.DataFrame): + if not isinstance(data.y, DataFrame): return joined = data.x joined['y'] = data.y @@ -159,7 +159,7 @@ def test_panel_ols_formula(data): def test_basic_formulas_predict(data, models, formula): - if not isinstance(data.y, pd.DataFrame): + if not isinstance(data.y, DataFrame): return joined = data.x joined['y'] = data.y @@ -204,7 +204,7 @@ def test_basic_formulas_predict(data, models, formula): def test_formulas_predict_error(data, models, formula): - if not isinstance(data.y, pd.DataFrame): + if not isinstance(data.y, DataFrame): return joined = data.x joined['y'] = data.y @@ -226,7 +226,7 @@ def test_formulas_predict_error(data, models, formula): def test_parser(data, formula, effects): - if not isinstance(data.y, pd.DataFrame): + if not isinstance(data.y, DataFrame): return if effects: formula += ' + EntityEffects + TimeEffects' diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index fd7f972c25..13aeefd880 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -3,8 +3,8 @@ import pytest import statsmodels.api as sm +from pandas.testing import assert_series_equal -from linearmodels.compat.pandas import assert_series_equal from linearmodels.datasets import wage_panel from linearmodels.iv.model import IV2SLS from linearmodels.panel.data import PanelData diff --git a/linearmodels/tests/system/results/execute-stata-3sls.py b/linearmodels/tests/system/results/execute-stata-3sls.py index 83e6a5448c..709efd11fb 100644 --- a/linearmodels/tests/system/results/execute-stata-3sls.py +++ b/linearmodels/tests/system/results/execute-stata-3sls.py @@ -5,7 +5,7 @@ import os import subprocess -import pandas as pd +from linearmodels.compat.pandas import concat from linearmodels.tests.system._utility import generate_simultaneous_data data = generate_simultaneous_data() @@ -21,7 +21,7 @@ else: out.append(vals[col]) all_cols.append(col) -out = pd.concat(out, 1) +out = concat(out, 1) if 'const' in out: out.pop('const') out.to_stata('simulated-3sls.dta', write_index=False) diff --git a/linearmodels/tests/system/results/generate_data.py b/linearmodels/tests/system/results/generate_data.py index be5e4976c1..5514c08641 100644 --- a/linearmodels/tests/system/results/generate_data.py +++ b/linearmodels/tests/system/results/generate_data.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd +from linearmodels.compat.pandas import concat from linearmodels.tests.system._utility import generate_data basic_data = generate_data(n=200, k=3, p=[2, 3, 4], const=True, seed=0) @@ -45,5 +46,5 @@ out.extend([dep]) if __name__ == '__main__': - df = pd.concat(out, 1) + df = concat(out, 1) df.to_stata('simulated-sur.dta') diff --git a/linearmodels/tests/system/test_3sls.py b/linearmodels/tests/system/test_3sls.py index 5dde79f316..cc6ae1bd45 100644 --- a/linearmodels/tests/system/test_3sls.py +++ b/linearmodels/tests/system/test_3sls.py @@ -1,11 +1,11 @@ from itertools import product import numpy as np -import pandas as pd import pytest from numpy.testing import assert_allclose +from pandas import DataFrame +from pandas.testing import assert_frame_equal, assert_series_equal -from linearmodels.compat.pandas import assert_frame_equal, assert_series_equal from linearmodels.system.model import IV3SLS from linearmodels.tests.system._utility import generate_3sls_data, simple_3sls, \ generate_3sls_data_v2 @@ -183,11 +183,11 @@ def test_multivariate_iv(): n = 250 dep = np.random.standard_normal((n, 2)) exog = np.random.standard_normal((n, 3)) - exog = pd.DataFrame(exog, columns=['exog.{0}'.format(i) for i in range(3)]) + exog = DataFrame(exog, columns=['exog.{0}'.format(i) for i in range(3)]) endog = np.random.standard_normal((n, 2)) - endog = pd.DataFrame(endog, columns=['endog.{0}'.format(i) for i in range(2)]) + endog = DataFrame(endog, columns=['endog.{0}'.format(i) for i in range(2)]) instr = np.random.standard_normal((n, 3)) - instr = pd.DataFrame(instr, columns=['instr.{0}'.format(i) for i in range(3)]) + instr = DataFrame(instr, columns=['instr.{0}'.format(i) for i in range(3)]) eqns = {} for i in range(2): eqns['dependent.{0}'.format(i)] = (dep[:, i], exog, endog, instr) @@ -204,7 +204,7 @@ def test_multivariate_iv_bad_data(): n = 250 dep = np.random.standard_normal((n, 2)) instr = np.random.standard_normal((n, 3)) - instr = pd.DataFrame(instr, columns=['instr.{0}'.format(i) for i in range(3)]) + instr = DataFrame(instr, columns=['instr.{0}'.format(i) for i in range(3)]) with pytest.raises(ValueError): IV3SLS.multivariate_ls(dep, None, None, instr) @@ -224,8 +224,8 @@ def test_fitted(data): expected.append(direct[:, None]) assert_allclose(eq.fitted_values, direct, atol=1e-8) expected = np.concatenate(expected, 1) - expected = pd.DataFrame(expected, index=mod._dependent[i].pandas.index, - columns=[key for key in res.equations]) + expected = DataFrame(expected, index=mod._dependent[i].pandas.index, + columns=[key for key in res.equations]) assert_frame_equal(expected, res.fitted_values) diff --git a/linearmodels/tests/system/test_3sls_against_stata.py b/linearmodels/tests/system/test_3sls_against_stata.py index dff99ee633..4c8eff662b 100644 --- a/linearmodels/tests/system/test_3sls_against_stata.py +++ b/linearmodels/tests/system/test_3sls_against_stata.py @@ -1,7 +1,7 @@ -import pandas as pd import pytest from numpy.testing import assert_allclose +from linearmodels.compat.pandas import concat from linearmodels.system import IV3SLS, SUR from linearmodels.tests.system._utility import generate_simultaneous_data from linearmodels.tests.system.results.parse_stata_3sls_results import results @@ -15,7 +15,7 @@ def fit(request): mod = SUR for key in data: temp = data[key] - temp['exog'] = pd.concat([temp['exog'], temp['endog']], 1) + temp['exog'] = concat([temp['exog'], temp['endog']], 1) del temp['endog'] del temp['instruments'] else: diff --git a/linearmodels/tests/system/test_formulas.py b/linearmodels/tests/system/test_formulas.py index 45a9153ddd..bfe076bd40 100644 --- a/linearmodels/tests/system/test_formulas.py +++ b/linearmodels/tests/system/test_formulas.py @@ -4,9 +4,9 @@ import numpy as np import pytest from pandas import Series, concat +from pandas.testing import assert_series_equal, assert_frame_equal from linearmodels import SUR, IVSystemGMM, IV3SLS -from linearmodels.compat.pandas import assert_series_equal, assert_frame_equal from linearmodels.formula import sur, iv_system_gmm, iv_3sls from linearmodels.system.model import SystemFormulaParser from linearmodels.tests.system._utility import generate_3sls_data_v2 diff --git a/linearmodels/tests/system/test_gmm.py b/linearmodels/tests/system/test_gmm.py index 91257ec16d..78a19d593d 100644 --- a/linearmodels/tests/system/test_gmm.py +++ b/linearmodels/tests/system/test_gmm.py @@ -2,11 +2,12 @@ from itertools import product import numpy as np -import pandas as pd import pytest from numpy.testing import assert_allclose +from pandas import DataFrame, Series +from pandas.testing import assert_frame_equal, assert_series_equal -from linearmodels.compat.pandas import assert_frame_equal, assert_series_equal +from linearmodels.compat.pandas import concat from linearmodels.iv.covariance import kernel_weight_parzen from linearmodels.system import IV3SLS, IVSystemGMM from linearmodels.system.gmm import HeteroskedasticWeightMatrix, HomoskedasticWeightMatrix, \ @@ -100,15 +101,15 @@ def test_formula_equivalence(data): ex = eqn.exog en = eqn.endog instr = eqn.instruments - dep = pd.DataFrame(dep, columns=['dep_{0}'.format(i)]) + dep = DataFrame(dep, columns=['dep_{0}'.format(i)]) has_const = False if np.any(np.all(ex == 1, 0)): ex = ex[:, 1:] has_const = True - ex = pd.DataFrame(ex, columns=['ex_{0}_{1}'.format(i, j) for j in range(ex.shape[1])]) - en = pd.DataFrame(en, columns=['en_{0}_{1}'.format(i, j) for j in range(en.shape[1])]) - instr = pd.DataFrame(instr, columns=['instr_{0}_{1}'.format(i, j) - for j in range(ex.shape[1])]) + ex = DataFrame(ex, columns=['ex_{0}_{1}'.format(i, j) for j in range(ex.shape[1])]) + en = DataFrame(en, columns=['en_{0}_{1}'.format(i, j) for j in range(en.shape[1])]) + instr = DataFrame(instr, columns=['instr_{0}_{1}'.format(i, j) + for j in range(ex.shape[1])]) fmla = ''.join(dep.columns) + ' ~ ' if has_const: fmla += ' 1 + ' @@ -121,7 +122,7 @@ def test_formula_equivalence(data): formulas = OrderedDict() for i, f in enumerate(formula): formulas['eq{0}'.format(i)] = f - df = pd.concat(df, 1) + df = concat(df, 1) formula_mod = IVSystemGMM.from_formula(formulas, df, weight_type='unadjusted') res = mod.fit(cov_type='unadjusted') formula_res = formula_mod.fit(cov_type='unadjusted') @@ -148,15 +149,15 @@ def test_formula_equivalence_weights(data): ex = eqn.exog en = eqn.endog instr = eqn.instruments - dep = pd.DataFrame(dep, columns=['dep_{0}'.format(i)]) + dep = DataFrame(dep, columns=['dep_{0}'.format(i)]) has_const = False if np.any(np.all(ex == 1, 0)): ex = ex[:, 1:] has_const = True - ex = pd.DataFrame(ex, columns=['ex_{0}_{1}'.format(i, j) for j in range(ex.shape[1])]) - en = pd.DataFrame(en, columns=['en_{0}_{1}'.format(i, j) for j in range(en.shape[1])]) - instr = pd.DataFrame(instr, columns=['instr_{0}_{1}'.format(i, j) - for j in range(ex.shape[1])]) + ex = DataFrame(ex, columns=['ex_{0}_{1}'.format(i, j) for j in range(ex.shape[1])]) + en = DataFrame(en, columns=['en_{0}_{1}'.format(i, j) for j in range(en.shape[1])]) + instr = DataFrame(instr, columns=['instr_{0}_{1}'.format(i, j) + for j in range(ex.shape[1])]) fmla = ''.join(dep.columns) + ' ~ ' if has_const: fmla += ' 1 + ' @@ -165,7 +166,7 @@ def test_formula_equivalence_weights(data): fmla += ' + '.join(instr.columns) + ' ] ' formulas[key] = fmla df.extend([dep, ex, en, instr]) - df = pd.concat(df, 1) + df = concat(df, 1) formula_mod = IVSystemGMM.from_formula(formulas, df, weights=weights, weight_type='unadjusted') res = mod.fit(cov_type='unadjusted') formula_res = formula_mod.fit(cov_type='unadjusted') @@ -276,9 +277,9 @@ def test_j_statistic_direct(data): def test_linear_constraint(data): mod = IVSystemGMM(data.eqns, weight_type=data.weight_type) p = mod.param_names - r = pd.DataFrame(np.zeros((1, len(p))), index=[0], columns=p) + r = DataFrame(np.zeros((1, len(p))), index=[0], columns=p) r.iloc[0, 1::6] = 1 - q = pd.Series([6]) + q = Series([6]) mod.add_constraints(r, q) res = mod.fit() @@ -383,6 +384,6 @@ def test_fitted(data): expected.append(direct[:, None]) assert_allclose(eq.fitted_values, direct, atol=1e-8) expected = np.concatenate(expected, 1) - expected = pd.DataFrame(expected, index=mod._dependent[i].pandas.index, - columns=[key for key in res.equations]) + expected = DataFrame(expected, index=mod._dependent[i].pandas.index, + columns=[key for key in res.equations]) assert_frame_equal(expected, res.fitted_values) diff --git a/linearmodels/tests/system/test_sur.py b/linearmodels/tests/system/test_sur.py index 6082b11229..8ba4c954b0 100644 --- a/linearmodels/tests/system/test_sur.py +++ b/linearmodels/tests/system/test_sur.py @@ -7,8 +7,8 @@ import pytest from numpy.testing import assert_allclose from pandas import DataFrame, Series, concat +from pandas.testing import assert_frame_equal, assert_series_equal -from linearmodels.compat.pandas import assert_frame_equal, assert_series_equal from linearmodels.iv.model import _OLS as OLS from linearmodels.system._utility import blocked_column_product, blocked_diag_product, \ inv_matrix_sqrt From 24841cf0d64bb6d7ad480329b3518798449a6f6d Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 29 Aug 2018 13:55:36 +0100 Subject: [PATCH 3/3] BLD: Update build config to include Python 3.7 Include test config with Python 3.7 Test compat version of concat --- .travis.yml | 8 ++++---- README.rst | 12 ++++++++++-- linearmodels/compat/pandas.py | 4 ++-- linearmodels/tests/test_compat.py | 27 +++++++++++++++++++++++++++ linearmodels/utility.py | 2 +- 5 files changed, 44 insertions(+), 9 deletions(-) create mode 100644 linearmodels/tests/test_compat.py diff --git a/.travis.yml b/.travis.yml index 01b0dffe87..fe6b899cd9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ env: matrix: fast_finish: true include: - - python: 3.6 + - python: 3.5 env: - PYTHON=3.5 - NUMPY=1.12 @@ -42,15 +42,15 @@ matrix: - python: 3.6 env: - PYTHON=3.6 - - NUMPY=1.15 - - SCIPY=1.1 + - NUMPY=1.14 + - SCIPY=1 - PANDAS=0.22 - XARRAY=0.10 - DOCBUILD=true - STATAMODELS=0.9 - python: 3.6 env: - - PYTHON=3.6 + - PYTHON=3.7 - NUMPY=1.14 - SCIPY=1.1 - PANDAS=0.23 diff --git a/README.rst b/README.rst index 50f7cf3901..d404ce58a6 100644 --- a/README.rst +++ b/README.rst @@ -1,8 +1,7 @@ Linear Models ============= -`Build Status `__ -`codecov `__ +|Build Status| |codecov| |Codacy Badge| |codebeat badge| Linear (regression) models for Python. Extends `statsmodels `__ with Panel regression, @@ -161,3 +160,12 @@ Documentation - nbformat - ipython - jupyter + +.. |Build Status| image:: https://travis-ci.org/bashtage/linearmodels.svg?branch=master + :target: https://travis-ci.org/bashtage/linearmodels +.. |codecov| image:: https://codecov.io/gh/bashtage/linearmodels/branch/master/graph/badge.svg + :target: https://codecov.io/gh/bashtage/linearmodels +.. |Codacy Badge| image:: https://api.codacy.com/project/badge/Grade/c771bce50a164b6fa71c344b374f140d + :target: https://www.codacy.com/app/bashtage/linearmodels?utm_source=github.com&utm_medium=referral&utm_content=bashtage/linearmodels&utm_campaign=Badge_Grade +.. |codebeat badge| image:: https://codebeat.co/badges/aaae2fb4-72b5-4a66-97cd-77b93488f243 + :target: https://codebeat.co/projects/github-com-bashtage-linearmodels-master diff --git a/linearmodels/compat/pandas.py b/linearmodels/compat/pandas.py index 34a029d779..770a9a79ad 100644 --- a/linearmodels/compat/pandas.py +++ b/linearmodels/compat/pandas.py @@ -14,10 +14,10 @@ def concat(*args, **kwargs): if PD_LT_023 and 'sort' in kwargs: kwargs = kwargs.copy() del kwargs['sort'] - else: + elif not PD_LT_023: if 'sort' not in kwargs: kwargs = kwargs.copy() - kwargs['sort'] = True + kwargs['sort'] = False return pd.concat(*args, **kwargs) diff --git a/linearmodels/tests/test_compat.py b/linearmodels/tests/test_compat.py new file mode 100644 index 0000000000..dff6efde40 --- /dev/null +++ b/linearmodels/tests/test_compat.py @@ -0,0 +1,27 @@ +import numpy as np +import pytest +from pandas import DataFrame, Series, date_range + +from linearmodels.compat.pandas import concat +from linearmodels.utility import AttrDict + + +@pytest.fixture('module') +def data(): + idx = date_range('2000-01-01', periods=100) + df1 = DataFrame(np.arange(100)[:, None], columns=['A'], index=idx) + x = np.reshape(np.arange(200), (100, 2)) + df2 = DataFrame(x, columns=['B', 'C'], index=idx[::-1]) + s = Series(300 + np.arange(100), index=idx, name='D') + return AttrDict(df1=df1, df2=df2, s=s) + + +def test_concat_sort(data): + a = concat([data.df1, data.df2], 1) + b = concat([data.df1, data.df2, data.s], 1) + c = concat([data.df1, data.df2, data.s], 1, sort=True) + d = concat([data.df2, data.df1, data.s], 1, sort=False) + assert list(a.columns) == ['A', 'B', 'C'] + assert list(b.columns) == ['A', 'B', 'C', 'D'] + assert list(c.columns) == ['A', 'B', 'C', 'D'] + assert list(d.columns) == ['B', 'C', 'A', 'D'] diff --git a/linearmodels/utility.py b/linearmodels/utility.py index 5b5a4c5a40..246cd596c5 100644 --- a/linearmodels/utility.py +++ b/linearmodels/utility.py @@ -439,7 +439,7 @@ def __init__(self, results): def _get_series_property(self, name): out = ([(k, getattr(v, name)) for k, v in self._results.items()]) cols = [v[0] for v in out] - values = concat([v[1] for v in out], 1, sort=True) + values = concat([v[1] for v in out], 1) values.columns = cols return values