%pip install numpy pandas pandas_datareader patsy statsmodels wooldridge -q
Note: you may need to restart the kernel to use updated packages.
import numpy as np # noqa
import pandas as pd
import patsy as pt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import wooldridge as wool
12.1 Testing for Serial Correlation of the Error Term¶
Example 12.2: Testing for AR(1) Serial Correlation¶
phillips = wool.data("phillips")
T = len(phillips)
# define yearly time series beginning in 1948:
date_range = pd.date_range(start="1948", periods=T, freq="YE")
phillips.index = date_range.year
# estimation of static Phillips curve:
yt96 = phillips["year"] <= 1996
reg_s = smf.ols(formula='Q("inf") ~ unem', data=phillips, subset=yt96)
results_s = reg_s.fit()
# residuals and AR(1) test:
phillips["resid_s"] = results_s.resid
phillips["resid_s_lag1"] = phillips["resid_s"].shift(1)
reg = smf.ols(formula="resid_s ~ resid_s_lag1", data=phillips, subset=yt96)
results = reg.fit()
# print regression table:
table = pd.DataFrame(
{
"b": round(results.params, 4),
"se": round(results.bse, 4),
"t": round(results.tvalues, 4),
"pval": round(results.pvalues, 4),
},
)
print(f"table: \n{table}\n")
table:
b se t pval
Intercept -0.1134 0.3594 -0.3155 0.7538
resid_s_lag1 0.5730 0.1161 4.9337 0.0000
phillips = wool.data("phillips")
T = len(phillips)
# define yearly time series beginning in 1948:
date_range = pd.date_range(start="1948", periods=T, freq="YE")
phillips.index = date_range.year
# estimation of expectations-augmented Phillips curve:
yt96 = phillips["year"] <= 1996
phillips["inf_diff1"] = phillips["inf"].diff()
reg_ea = smf.ols(formula="inf_diff1 ~ unem", data=phillips, subset=yt96)
results_ea = reg_ea.fit()
phillips["resid_ea"] = results_ea.resid
phillips["resid_ea_lag1"] = phillips["resid_ea"].shift(1)
reg = smf.ols(formula="resid_ea ~ resid_ea_lag1", data=phillips, subset=yt96)
results = reg.fit()
# print regression table:
table = pd.DataFrame(
{
"b": round(results.params, 4),
"se": round(results.bse, 4),
"t": round(results.tvalues, 4),
"pval": round(results.pvalues, 4),
},
)
print(f"table: \n{table}\n")
table:
b se t pval
Intercept 0.1942 0.3004 0.6464 0.5213
resid_ea_lag1 -0.0356 0.1239 -0.2873 0.7752
Example 12.4: Testing for AR(3) Serial Correlation¶
barium = wool.data("barium")
T = len(barium)
# monthly time series starting Feb. 1978:
barium.index = pd.date_range(start="1978-02", periods=T, freq="ME")
reg = smf.ols(
formula="np.log(chnimp) ~ np.log(chempi) + np.log(gas) +"
"np.log(rtwex) + befile6 + affile6 + afdec6",
data=barium,
)
results = reg.fit()
# automatic test:
bg_result = sm.stats.diagnostic.acorr_breusch_godfrey(results, nlags=3)
fstat_auto = bg_result[2]
fpval_auto = bg_result[3]
print(f"fstat_auto: {fstat_auto}\n")
print(f"fpval_auto: {fpval_auto}\n")
fstat_auto: 5.124662239772472
fpval_auto: 0.0022637197671316776
# pedestrian test:
barium["resid"] = results.resid
barium["resid_lag1"] = barium["resid"].shift(1)
barium["resid_lag2"] = barium["resid"].shift(2)
barium["resid_lag3"] = barium["resid"].shift(3)
reg_manual = smf.ols(
formula="resid ~ resid_lag1 + resid_lag2 + resid_lag3 +"
"np.log(chempi) + np.log(gas) + np.log(rtwex) +"
"befile6 + affile6 + afdec6",
data=barium,
)
results_manual = reg_manual.fit()
hypotheses = ["resid_lag1 = 0", "resid_lag2 = 0", "resid_lag3 = 0"]
ftest_manual = results_manual.f_test(hypotheses)
fstat_manual = ftest_manual.statistic
fpval_manual = ftest_manual.pvalue
print(f"fstat_manual: {fstat_manual}\n")
print(f"fpval_manual: {fpval_manual}\n")
fstat_manual: 5.122907054069379
fpval_manual: 0.002289802832966284
phillips = wool.data("phillips")
T = len(phillips)
# define yearly time series beginning in 1948:
date_range = pd.date_range(start="1948", periods=T, freq="YE")
phillips.index = date_range.year
# estimation of both Phillips curve models:
yt96 = phillips["year"] <= 1996
phillips["inf_diff1"] = phillips["inf"].diff()
reg_s = smf.ols(formula='Q("inf") ~ unem', data=phillips, subset=yt96)
reg_ea = smf.ols(formula="inf_diff1 ~ unem", data=phillips, subset=yt96)
results_s = reg_s.fit()
results_ea = reg_ea.fit()
# DW tests:
DW_s = sm.stats.stattools.durbin_watson(results_s.resid)
DW_ea = sm.stats.stattools.durbin_watson(results_ea.resid)
print(f"DW_s: {DW_s}\n")
print(f"DW_ea: {DW_ea}\n")
DW_s: 0.802700467848626
DW_ea: 1.7696478574549563
barium = wool.data("barium")
T = len(barium)
# monthly time series starting Feb. 1978:
barium.index = pd.date_range(start="1978-02", periods=T, freq="ME")
# perform the Cochrane-Orcutt estimation (iterative procedure):
y, X = pt.dmatrices(
"np.log(chnimp) ~ np.log(chempi) + np.log(gas) +"
"np.log(rtwex) + befile6 + affile6 + afdec6",
data=barium,
return_type="dataframe",
)
reg = sm.GLSAR(y, X)
CORC_results = reg.iterative_fit(maxiter=100)
table = pd.DataFrame({"b_CORC": CORC_results.params, "se_CORC": CORC_results.bse})
print(f"reg.rho: {reg.rho}\n")
print(f"table: \n{table}\n")
reg.rho: [0.29585313]
table:
b_CORC se_CORC
Intercept -37.512978 23.239015
np.log(chempi) 2.945448 0.647696
np.log(gas) 1.063321 0.991558
np.log(rtwex) 1.138404 0.514910
befile6 -0.017314 0.321390
affile6 -0.033108 0.323806
afdec6 -0.577328 0.344075
prminwge = wool.data("prminwge")
T = len(prminwge)
prminwge["time"] = prminwge["year"] - 1949
prminwge.index = pd.date_range(start="1950", periods=T, freq="YE").year
# OLS regression:
reg = smf.ols(
formula="np.log(prepop) ~ np.log(mincov) + np.log(prgnp) +np.log(usgnp) + time",
data=prminwge,
)
# results with regular SE:
results_regu = reg.fit()
# print regression table:
table_regu = pd.DataFrame(
{
"b": round(results_regu.params, 4),
"se": round(results_regu.bse, 4),
"t": round(results_regu.tvalues, 4),
"pval": round(results_regu.pvalues, 4),
},
)
print(f"table_regu: \n{table_regu}\n")
table_regu:
b se t pval
Intercept -6.6634 1.2578 -5.2976 0.0000
np.log(mincov) -0.2123 0.0402 -5.2864 0.0000
np.log(prgnp) 0.2852 0.0805 3.5437 0.0012
np.log(usgnp) 0.4860 0.2220 2.1896 0.0357
time -0.0267 0.0046 -5.7629 0.0000
# results with HAC SE:
results_hac = reg.fit(cov_type="HAC", cov_kwds={"maxlags": 2})
# print regression table:
table_hac = pd.DataFrame(
{
"b": round(results_hac.params, 4),
"se": round(results_hac.bse, 4),
"t": round(results_hac.tvalues, 4),
"pval": round(results_hac.pvalues, 4),
},
)
print(f"table_hac: \n{table_hac}\n")
table_hac:
b se t pval
Intercept -6.6634 1.4318 -4.6539 0.0000
np.log(mincov) -0.2123 0.0426 -4.9821 0.0000
np.log(prgnp) 0.2852 0.0928 3.0720 0.0021
np.log(usgnp) 0.4860 0.2601 1.8687 0.0617
time -0.0267 0.0054 -4.9710 0.0000
nyse = wool.data("nyse")
nyse["ret"] = nyse["return"]
nyse["ret_lag1"] = nyse["ret"].shift(1)
# linear regression of model:
reg = smf.ols(formula="ret ~ ret_lag1", data=nyse)
results = reg.fit()
# squared residuals:
nyse["resid_sq"] = results.resid**2
nyse["resid_sq_lag1"] = nyse["resid_sq"].shift(1)
# model for squared residuals:
ARCHreg = smf.ols(formula="resid_sq ~ resid_sq_lag1", data=nyse)
results_ARCH = ARCHreg.fit()
# print regression table:
table = pd.DataFrame(
{
"b": round(results_ARCH.params, 4),
"se": round(results_ARCH.bse, 4),
"t": round(results_ARCH.tvalues, 4),
"pval": round(results_ARCH.pvalues, 4),
},
)
print(f"table: \n{table}\n")
table:
b se t pval
Intercept 2.9474 0.4402 6.6951 0.0
resid_sq_lag1 0.3371 0.0359 9.3767 0.0