# 第6章 大样本OLS

## 6.4 蒙特卡洛模拟中心极限定理

In [None]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from cmath import sqrt

np.random.seed(0)

# 生成10000个从均匀分布中抽样30次的样本均值
x_mean = np.array([])

def generate_x_i(obs):
    x_i = np.random.uniform(0, 1, obs)
    return x_i.mean()

for i in range(10000):
    x_mean = np.append(x_mean,generate_x_i(30))

# 生成正态分布N(0.5，sqrt(1/360))的样本
samples = norm.rvs(loc=0.5, scale=sqrt(1/360), size=10000)
# 计算样本的均值和标准差
sample_mean = np.mean(samples)
sample_std = np.std(samples)
x = np.linspace(0.3, 0.7, 1000)
y = norm.pdf(x, sample_mean, sample_std)

# 绘制概率密度函数
sns.set_theme(color_codes=True)
sns.histplot(x_mean,kde=False,stat='density',alpha=0.5)
plt.xlim(0.3,0.7)
plt.plot(x, y)
plt.show()

## 6.10 大样本OLS和python命令及实例

In [None]:

import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import numpy as np

nerlove = pd.read_stata('../2_Data/Data-2e/nerlove.dta')
nerlove.head(), nerlove.describe()

In [None]:
# 先使用普通标准误差计算回归方程

X = nerlove[['lnq','lnpl','lnpk','lnpf']]
y = nerlove['lntc']
X = sm.add_constant(X)
model = sm.OLS(y,X)
results = model.fit()
print(results.summary())

In [None]:
# 对规模报酬递增进行F检验
# import numpy as np

R = np.zeros_like(results.params)
R[1] = 1
q = 1
r_matrix = (R, q)

F_test = results.f_test(r_matrix)
print(F_test)


In [None]:
# 然后使用HC1方法计算回归方程的标准误差。
# 使用get_robustcov_results方法计算稳健标准误
robust_results0 = results.get_robustcov_results(cov_type='HC0')
robust_results1 = results.get_robustcov_results(cov_type='HC1')
robust_results2 = results.get_robustcov_results(cov_type='HC2')
robust_results3 = results.get_robustcov_results(cov_type='HC3')

# HC0使用的是white(1980)的方法
# 经过将这4种不同计算方法的对比，显然HC1方法与教材中的数据一致，因此也可以推测出state中的稳健选项是HC1。

print(robust_results0.summary())

In [None]:
print(robust_results1.summary())

In [None]:
print(robust_results2.summary())

In [None]:
print(robust_results3.summary())

In [None]:
# 构建原假设
R = np.zeros_like(results.params)
R[1] = 1
q = 1
r_matrix = (R, q)

F_test = robust_results1.f_test(r_matrix)
print(F_test)

## 6.11 大样本理论的蒙特卡罗模拟

DGP：

$y = \alpha + \beta x + \epsilon$

$x \sim \chi^2(1)$

$\epsilon \sim \chi^2(10)-10$

In [None]:
from scipy.stats import chi2
import pandas as pd
import seaborn as sns

# 模拟x序列的数
def chi2_generator(df, size, c):
    x = np.array([])
    x = chi2.rvs(df=df, size=size)
    x = x + c
    return x

def monte_carlo_test(size,times):
    res=[]
    for i in range(times):
        x  = chi2_generator(df=1, size=size, c=0)
        epsilon = chi2_generator(df=10, size=size, c=-10)
        y = 1 + 2*x + epsilon
        X = x
        X = sm.add_constant(X)
        model = sm.OLS(y,X)
        results = model.fit()
        res.append(results.params[1])
    beta = pd.Series(res)    
    return beta

In [None]:
# 样本容量为20时，模拟10000次，计算beta值
beta1 = monte_carlo_test(20, 10000)
print(beta1.describe())
sns.histplot(beta1, bins=50, kde=True)

In [None]:
# 样本容量为100，重复试验10000次
beta2 = monte_carlo_test(100, 10000)
print(beta2.describe())
sns.histplot(beta2, bins=50, kde=True)

In [None]:
# 样本容量为1000，模拟10000次
beta3 = monte_carlo_test(1000, 10000)
print(beta3.describe())
sns.histplot(beta3, bins=50, kde=True)

## 习题 6.5

In [8]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# 读取数据
grilic = pd.read_stata('../2_Data/Data-2e/grilic.dta')
grilic_south = grilic[grilic['rns']==1]
grilic_north = grilic[grilic['rns']==0]


def grilic_OLS(data_set, robust = False):
    X = data_set[['s','expr','tenure','smsa']]
    y = data_set['lnw']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    if robust:
        robust_results = results.get_robustcov_results(cov_type='HC1')
        return robust_results
    else:
        return results  

In [10]:
res_all = grilic_OLS(grilic,robust=True)
print(res_all.summary())

                            OLS Regression Results                            
Dep. Variable:                    lnw   R-squared:                       0.345
Model:                            OLS   Adj. R-squared:                  0.341
Method:                 Least Squares   F-statistic:                     98.36
Date:                Mon, 15 Apr 2024   Prob (F-statistic):           2.42e-67
Time:                        19:56:10   Log-Likelihood:                -273.23
No. Observations:                 758   AIC:                             556.5
Df Residuals:                     753   BIC:                             579.6
Df Model:                           4                                         
Covariance Type:                  HC1                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.0591      0.086     47.142      0.0

In [11]:
res_south = grilic_OLS(grilic_south,True)
print(res_south.summary())

                            OLS Regression Results                            
Dep. Variable:                    lnw   R-squared:                       0.420
Model:                            OLS   Adj. R-squared:                  0.409
Method:                 Least Squares   F-statistic:                     36.04
Date:                Mon, 15 Apr 2024   Prob (F-statistic):           1.21e-22
Time:                        19:56:47   Log-Likelihood:                -72.353
No. Observations:                 204   AIC:                             154.7
Df Residuals:                     199   BIC:                             171.3
Df Model:                           4                                         
Covariance Type:                  HC1                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.8061      0.158     24.046      0.0

In [12]:
res_north = grilic_OLS(grilic_north,True)
print(res_north.summary())

                            OLS Regression Results                            
Dep. Variable:                    lnw   R-squared:                       0.313
Model:                            OLS   Adj. R-squared:                  0.308
Method:                 Least Squares   F-statistic:                     59.45
Date:                Mon, 15 Apr 2024   Prob (F-statistic):           1.05e-41
Time:                        19:57:11   Log-Likelihood:                -191.70
No. Observations:                 554   AIC:                             393.4
Df Residuals:                     549   BIC:                             415.0
Df Model:                           4                                         
Covariance Type:                  HC1                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.2140      0.103     40.736      0.0

## 习题 6.6

In [17]:
hedonic = pd.read_stata('../2_Data/Data-2e/hprice2a.dta')
hedonic.head()

def hedonic_OLS(data_set, robust = False):
    X = data_set[['lnox','ldist','rooms','stratio']]
    y = data_set['lprice']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    if robust:
        robust_results = results.get_robustcov_results(cov_type='HC1')
        return robust_results
    else:
        return results  
# hedonic.describe()


In [18]:
res_all = hedonic_OLS(hedonic)
print(res_all.summary())

                            OLS Regression Results                            
Dep. Variable:                 lprice   R-squared:                       0.584
Model:                            OLS   Adj. R-squared:                  0.581
Method:                 Least Squares   F-statistic:                     175.9
Date:                Mon, 15 Apr 2024   Prob (F-statistic):           5.53e-94
Time:                        20:08:07   Log-Likelihood:                -43.495
No. Observations:                 506   AIC:                             96.99
Df Residuals:                     501   BIC:                             118.1
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         11.0839      0.318     34.843      0.0

In [19]:
res_robust = hedonic_OLS(hedonic, robust=True)
print(res_robust.summary())

                            OLS Regression Results                            
Dep. Variable:                 lprice   R-squared:                       0.584
Model:                            OLS   Adj. R-squared:                  0.581
Method:                 Least Squares   F-statistic:                     146.3
Date:                Mon, 15 Apr 2024   Prob (F-statistic):           9.09e-83
Time:                        20:10:25   Log-Likelihood:                -43.495
No. Observations:                 506   AIC:                             96.99
Df Residuals:                     501   BIC:                             118.1
Df Model:                           4                                         
Covariance Type:                  HC1                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         11.0839      0.377     29.377      0.0

In [21]:
# t-检验 b3 = b5
R = np.zeros_like(res_robust.params)
R[2]= 1
R[4]= -1
q = 0
r_matrix = (R,q)
res_robust_t = res_robust.t_test(r_matrix)
print(res_robust_t)

# 拒绝原假设

                             Test for Constraints                             
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
c0            -0.0819      0.054     -1.508      0.132      -0.189       0.025


In [22]:
# t-检验 b4=0.31 
R = np.zeros_like(res_robust.params)
R[3] = 1
q = 0.31
r_matrix = (R,q)
res_robust_t = res_robust.t_test(r_matrix)
print(res_robust_t)


                             Test for Constraints                             
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.2545      0.025     -2.244      0.025       0.206       0.303


In [23]:
# t-检验  b4=0.3
R = np.zeros_like(res_robust.params)
R[3] = 1
q = 0.3
r_matrix = (R,q)
res_robust_t = res_robust.t_test(r_matrix)
print(res_robust_t)

                             Test for Constraints                             
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
c0             0.2545      0.025     -1.839      0.066       0.206       0.303
