In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm

In [13]:
data = pd.read_csv("04_CHL.csv")

# data in per capita
data['GDP_pc'] = data[['GDP']].div(data.Pop, axis=0)
data['Prod_pc'] = data[['Steel']].div(data.Pop, axis=0)

# take ln to linearize
data['GDP_pc_ln'] = np.log(data.GDP_pc)
data['Prod_pc_ln'] = np.log(data.Prod_pc)
data.head()

Unnamed: 0,year,GDP,Pop,Steel,ordinal,GDP_pc,Prod_pc,GDP_pc_ln,Prod_pc_ln
0,1980,75244937.47,11178.81699,809000,1,6731.028653,72.369017,8.814483,4.281778
1,1981,78802559.99,11348.44199,724000,2,6943.910015,63.797304,8.84562,4.155711
2,1982,70661594.54,11527.27403,334000,3,6129.94836,28.97476,8.720942,3.366425
3,1983,67980358.79,11714.44195,462000,4,5803.123963,39.438498,8.666152,3.674742
4,1984,73394748.44,11908.18892,628000,5,6163.38462,52.736819,8.726381,3.965314


In [18]:
# regression without lag term
# regression using Sci-kit Learn
X = data[['ordinal','GDP_pc_ln']]
y = data['Prod_pc_ln'].values.reshape(-1,1)
reg = LinearRegression()
reg.fit(X, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [19]:
print(reg.coef_)
print(reg.intercept_)

[[-0.03936284  1.92410272]]
[-12.75725213]


In [24]:
X2 = np.column_stack((data['ordinal'], data['GDP_pc_ln']))
y = data['Prod_pc_ln']

X2 = sm.add_constant(X2)
est = sm.OLS(y, X2,missing='drop')
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:             Prod_pc_ln   R-squared:                       0.916
Model:                            OLS   Adj. R-squared:                  0.910
Method:                 Least Squares   F-statistic:                     178.8
Date:                Mon, 24 Jun 2019   Prob (F-statistic):           1.95e-18
Time:                        18:05:07   Log-Likelihood:                 18.366
No. Observations:                  36   AIC:                            -30.73
Df Residuals:                      33   BIC:                            -25.98
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -12.7573      2.347     -5.436      0.0

In [16]:
# add laged dependent variable term
data['Prod_pc_ln_lag'] = data['Prod_pc_ln'].shift(periods=1)
data = data.dropna()

# regression with lagged dependent variable term
# regression using Sci-kit Learn
Xlag = data[['ordinal','GDP_pc_ln','Prod_pc_ln_lag']]
y = data['Prod_pc_ln'].values.reshape(-1,1)
reg = LinearRegression()
reg.fit(Xlag, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [17]:
print(reg.coef_)
print(reg.intercept_)

[[-0.04258113  2.0741781  -0.07490693]]
[-13.76584471]


In [25]:
X2 = np.column_stack((data['ordinal'], data['GDP_pc_ln'], data['Prod_pc_ln_lag']))
y = data['Prod_pc_ln']

X2 = sm.add_constant(X2)
est = sm.OLS(y, X2,missing='drop')
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:             Prod_pc_ln   R-squared:                       0.916
Model:                            OLS   Adj. R-squared:                  0.908
Method:                 Least Squares   F-statistic:                     116.3
Date:                Mon, 24 Jun 2019   Prob (F-statistic):           2.74e-17
Time:                        18:05:32   Log-Likelihood:                 18.458
No. Observations:                  36   AIC:                            -28.92
Df Residuals:                      32   BIC:                            -22.58
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -13.7658      3.441     -4.001      0.0