In [52]:
import pandas as pd
import numpy as np
import io
import statsmodels.formula.api as smf
import scipy.stats as stats
import re

# 1 F-тест: значимость регрессии

$$
F_{nabl} = \frac{R^2}{1-R^2} * \frac{n-m}{m-1}
$$

$$
F_{crit} (\alpha, k, n-k-1)
$$

## 1.1 sleep equation #1

In [53]:
some_string1 = '''Observations     706         706         706         706    
R2              0.132       0.131       0.128       0.007   
Adjusted_R2     0.121       0.123       0.121       0.002'''

In [54]:
df = pd.read_csv(io.StringIO(some_string1), sep='\s+' , header=None , index_col = 0)
df

Unnamed: 0_level_0,1,2,3,4
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Observations,706.0,706.0,706.0,706.0
R2,0.132,0.131,0.128,0.007
Adjusted_R2,0.121,0.123,0.121,0.002


In [55]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2
1,706.0,0.132,0.121
2,706.0,0.131,0.123
3,706.0,0.128,0.121
4,706.0,0.007,0.002


In [56]:
df['m'] = [10,8,6,5]
df

Unnamed: 0,Observations,R2,Adjusted_R2,m
1,706.0,0.132,0.121,10
2,706.0,0.131,0.123,8
3,706.0,0.128,0.121,6
4,706.0,0.007,0.002,5


In [57]:
df['F_nabl'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df.round(2)

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl
1,706.0,0.13,0.12,10,11.76
2,706.0,0.13,0.12,8,15.03
3,706.0,0.13,0.12,6,20.55
4,706.0,0.01,0.0,5,1.24


In [58]:
alpha = 0.05
df['F_crit'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m'])
df.round(2)

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit
1,706.0,0.13,0.12,10,11.76,1.89
2,706.0,0.13,0.12,8,15.03,2.02
3,706.0,0.13,0.12,6,20.55,2.23
4,706.0,0.01,0.0,5,1.24,2.38


In [59]:
df['znachimost'] = df.apply(lambda x : 'znachimo' if x['F_nabl'] > x['F_crit'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit,znachimost
1,706.0,0.132,0.121,10,11.760369,1.893317,znachimo
2,706.0,0.131,0.123,8,15.031728,2.022681,znachimo
3,706.0,0.128,0.121,6,20.550459,2.226901,znachimo
4,706.0,0.007,0.002,5,1.235398,2.384638,ne znachimo


## 1.2 cost equation #1

In [60]:
some_string2 = '''Observations    158       158       158       158      158  
R2             0.982     0.965     0.982     0.053    0.026 
Adjusted_R2    0.982     0.965     0.982     0.035    0.020 '''

In [61]:
df = pd.read_csv(io.StringIO(some_string2), sep='\s+' , header=None , index_col = 0)
df

Unnamed: 0_level_0,1,2,3,4,5
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Observations,158.0,158.0,158.0,158.0,158.0
R2,0.982,0.965,0.982,0.053,0.026
Adjusted_R2,0.982,0.965,0.982,0.035,0.02


In [62]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2
1,158.0,0.982,0.982
2,158.0,0.965,0.965
3,158.0,0.982,0.982
4,158.0,0.053,0.035
5,158.0,0.026,0.02


In [63]:
df['m'] = [5,2,4,4,2]
df

Unnamed: 0,Observations,R2,Adjusted_R2,m
1,158.0,0.982,0.982,5
2,158.0,0.965,0.965,2
3,158.0,0.982,0.982,4
4,158.0,0.053,0.035,4
5,158.0,0.026,0.02,2


In [64]:
df['F_nabl'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df.round(2)

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl
1,158.0,0.98,0.98,5,2086.75
2,158.0,0.96,0.96,2,4301.14
3,158.0,0.98,0.98,4,2800.52
4,158.0,0.05,0.04,4,2.87
5,158.0,0.03,0.02,2,4.16


In [65]:
alpha = 0.01
df['F_crit'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m'])
df.round(2)

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit
1,158.0,0.98,0.98,5,2086.75,3.44
2,158.0,0.96,0.96,2,4301.14,6.8
3,158.0,0.98,0.98,4,2800.52,3.91
4,158.0,0.05,0.04,4,2.87,3.91
5,158.0,0.03,0.02,2,4.16,6.8


In [66]:
df['znachimost'] = df.apply(lambda x : 'znachimo' if x['F_nabl'] > x['F_crit'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit,znachimost
1,158.0,0.982,0.982,5,2086.75,3.444189,znachimo
2,158.0,0.965,0.965,2,4301.142857,6.800161,znachimo
3,158.0,0.982,0.982,4,2800.518519,3.911342,znachimo
4,158.0,0.053,0.035,4,2.872932,3.911342,ne znachimo
5,158.0,0.026,0.02,2,4.164271,6.800161,ne znachimo


## 1.3 sleep equation #2

In [67]:
some_string3 = '''Observations            706         706         706         706    
R2                     0.118       0.022       0.015       0.007   
Adjusted_R2            0.110       0.014       0.012       0.002'''

In [68]:
df = pd.read_csv(io.StringIO(some_string3), sep='\s+' , header=None , index_col = 0)
df

Unnamed: 0_level_0,1,2,3,4
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Observations,706.0,706.0,706.0,706.0
R2,0.118,0.022,0.015,0.007
Adjusted_R2,0.11,0.014,0.012,0.002


In [69]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2
1,706.0,0.118,0.11
2,706.0,0.022,0.014
3,706.0,0.015,0.012
4,706.0,0.007,0.002


In [70]:
df['m'] = [8,7,3,5]
df

Unnamed: 0,Observations,R2,Adjusted_R2,m
1,706.0,0.118,0.11,8
2,706.0,0.022,0.014,7
3,706.0,0.015,0.012,3
4,706.0,0.007,0.002,5


In [71]:
df['F_nabl'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df.round(2)

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl
1,706.0,0.12,0.11,8,13.34
2,706.0,0.02,0.01,7,2.62
3,706.0,0.02,0.01,3,5.35
4,706.0,0.01,0.0,5,1.24


In [72]:
alpha = 0.01
df['F_crit'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m'])
df.round(2)

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit
1,706.0,0.12,0.11,8,13.34,2.66
2,706.0,0.02,0.01,7,2.62,2.83
3,706.0,0.02,0.01,3,5.35,4.64
4,706.0,0.01,0.0,5,1.24,3.35


In [73]:
df['znachimost'] = df.apply(lambda x : 'znachimo' if x['F_nabl'] > x['F_crit'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit,znachimost
1,706.0,0.118,0.11,8,13.34046,2.66493,znachimo
2,706.0,0.022,0.014,7,2.620654,2.827785,ne znachimo
3,706.0,0.015,0.012,3,5.352792,4.63547,znachimo
4,706.0,0.007,0.002,5,1.235398,3.346,ne znachimo


## 1.4 cost equation #2

In [74]:
some_string4 = '''Observations            158         158         158        158      158  
R2                     0.992       0.976       0.982      0.053    0.026 
Adjusted_R2            0.992       0.976       0.982      0.035    0.020'''

In [75]:
df = pd.read_csv(io.StringIO(some_string4), sep='\s+' , header=None , index_col = 0)
df

Unnamed: 0_level_0,1,2,3,4,5
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Observations,158.0,158.0,158.0,158.0,158.0
R2,0.992,0.976,0.982,0.053,0.026
Adjusted_R2,0.992,0.976,0.982,0.035,0.02


In [76]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2
1,158.0,0.992,0.992
2,158.0,0.976,0.976
3,158.0,0.982,0.982
4,158.0,0.053,0.035
5,158.0,0.026,0.02


In [77]:
df['m'] = [6,3,5,4,2]
df

Unnamed: 0,Observations,R2,Adjusted_R2,m
1,158.0,0.992,0.992,6
2,158.0,0.976,0.976,3
3,158.0,0.982,0.982,5
4,158.0,0.053,0.035,4
5,158.0,0.026,0.02,2


In [78]:
df['F_nabl'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df.round(2)

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl
1,158.0,0.99,0.99,6,3769.6
2,158.0,0.98,0.98,3,3151.67
3,158.0,0.98,0.98,5,2086.75
4,158.0,0.05,0.04,4,2.87
5,158.0,0.03,0.02,2,4.16


In [79]:
alpha = 0.01
df['F_crit'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m'])
df.round(2)

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit
1,158.0,0.99,0.99,6,3769.6,3.14
2,158.0,0.98,0.98,3,3151.67,4.74
3,158.0,0.98,0.98,5,2086.75,3.44
4,158.0,0.05,0.04,4,2.87,3.91
5,158.0,0.03,0.02,2,4.16,6.8


In [80]:
df['znachimost'] = df.apply(lambda x : 'znachimo' if x['F_nabl'] > x['F_crit'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit,znachimost
1,158.0,0.992,0.992,6,3769.6,3.139909,znachimo
2,158.0,0.976,0.976,3,3151.666667,4.744744,znachimo
3,158.0,0.982,0.982,5,2086.75,3.444189,znachimo
4,158.0,0.053,0.035,4,2.872932,3.911342,ne znachimo
5,158.0,0.026,0.02,2,4.164271,6.800161,ne znachimo


# 2 F-тест: совместная значимость

$$
H_0: \beta_{0} = \beta_{1} = \dots = \beta_k = 0
$$

$$
H_1: \beta_{0}^2 + \beta_{1}^2 + \dots + \beta_k^2 > 0
$$

$$
F_{nabl} = \frac{R_{ur}^2-R_r^2}{1-R_{ur}^2} * \frac{n-m}{q}
$$

## 2.1 sleep equation #1

In [81]:
some_string5 = '''Names   Estimate  Std.Error t_nabl   
(Intercept) 3446.830301   81.839915 42.1167  
totwrk        -0.169130    0.018074 -9.3577 
age            2.714483    1.472374  1.8436   
male          87.108150   35.173210  2.4765   
south        102.271833   41.925047  2.4394  
smsa         -54.187710   33.193402 -1.6325      
yngkid       -13.051272   50.459104 -0.2587   
marr          31.360412   42.263190  0.7420     
union         11.865664   38.185877  0.3107'''

In [82]:
df = pd.read_csv(io.StringIO(some_string5), sep='\s+' )
df.round(3)

Unnamed: 0,Names,Estimate,Std.Error,t_nabl
0,(Intercept),3446.83,81.84,42.117
1,totwrk,-0.169,0.018,-9.358
2,age,2.714,1.472,1.844
3,male,87.108,35.173,2.476
4,south,102.272,41.925,2.439
5,smsa,-54.188,33.193,-1.632
6,yngkid,-13.051,50.459,-0.259
7,marr,31.36,42.263,0.742
8,union,11.866,38.186,0.311


In [83]:
alpha = 0.05
t_crit = stats.t.ppf(1-alpha/2, 706-8-1)
t_crit.round(3)

1.963

In [84]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if np.abs(x['t_nabl']) > t_crit else 'ne znachimo', axis=1 )
df

Unnamed: 0,Names,Estimate,Std.Error,t_nabl,znachimost
0,(Intercept),3446.830301,81.839915,42.1167,znachimo
1,totwrk,-0.16913,0.018074,-9.3577,znachimo
2,age,2.714483,1.472374,1.8436,ne znachimo
3,male,87.10815,35.17321,2.4765,znachimo
4,south,102.271833,41.925047,2.4394,znachimo
5,smsa,-54.18771,33.193402,-1.6325,ne znachimo
6,yngkid,-13.051272,50.459104,-0.2587,ne znachimo
7,marr,31.360412,42.26319,0.742,ne znachimo
8,union,11.865664,38.185877,0.3107,ne znachimo


$$
H_0: \beta_{marr} = \beta_{union} = \beta_{ynkid} = \beta_{smsa} = 0
$$

$$
H_1: \beta_{marr}^2 + \beta_{union}^2 + \beta_{ynkid}^2 + \beta_{smsa}^2 > 0
$$

In [38]:
unrest, rest = np.float_( re.findall(r'([-+]?\d+.\d+)', 'R2                      0.131          0.127    '))
print(unrest, rest)

0.131 0.127


In [41]:
F_nabl = (unrest - rest)/(1-unrest)*(706 - 9)/4
F_nabl.round(3)

0.802

In [42]:
alpha = 0.05
F_crit = stats.f.ppf(1-alpha,4,706-9)
F_crit.round(3)

2.385

$ F_{nabl} < F_{crit} $ следовательно данные согласуются с гипотезой $H_0 =>$ коэффициенты совместно незначимы

## 2.2 sleep equation #2

In [151]:
some_string6 = '''Names Estimate  Std.Error t_nabl  
(Intercept) 3613.994663  218.432669 16.5451 
totwrk        -0.167320    0.018132 -9.2278 
age           -6.254665   11.191179 -0.5589    
I(age^2)       0.108862    0.133736  0.8140  
male          90.456708   34.257144  2.6405 
south        114.547494   40.637198  2.8188'''

In [152]:
df = pd.read_csv(io.StringIO(some_string6), sep='\s+' )
df.round(3)

Unnamed: 0,Names,Estimate,Std.Error,t_nabl
0,(Intercept),3613.995,218.433,16.545
1,totwrk,-0.167,0.018,-9.228
2,age,-6.255,11.191,-0.559
3,I(age^2),0.109,0.134,0.814
4,male,90.457,34.257,2.64
5,south,114.547,40.637,2.819


In [153]:
alpha = 0.01
t_crit = stats.t.ppf(1-alpha/2, 706-5-1)
t_crit.round(3)

2.583

In [154]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if np.abs(x['t_nabl']) > t_crit else 'ne znachimo', axis=1 )
df

Unnamed: 0,Names,Estimate,Std.Error,t_nabl,znachimost
0,(Intercept),3613.994663,218.432669,16.5451,znachimo
1,totwrk,-0.16732,0.018132,-9.2278,znachimo
2,age,-6.254665,11.191179,-0.5589,ne znachimo
3,I(age^2),0.108862,0.133736,0.814,ne znachimo
4,male,90.456708,34.257144,2.6405,znachimo
5,south,114.547494,40.637198,2.8188,znachimo


$$
H_0: \beta_{age} = \beta_{age^2} = 0
$$

$$
H_1: \beta_{age}^2 + \beta_{age^2}^2 > 0
$$

In [108]:
unrest, rest = np.float_( re.findall(r'([-+]?\d+.\d+)', 'R2                      0.128          0.122'))
print(unrest, rest)

0.128 0.122


In [109]:
F_nabl = (unrest - rest)/(1-unrest)*(706 - 6)/2
F_nabl.round(3)

2.408

In [110]:
alpha = 0.01
F_crit = stats.f.ppf(1-alpha,2,706-6)
F_crit.round(3)

4.636

$ F_{nabl} < F_{crit} $ следовательно данные согласуются с гипотезой $H_0 =>$ коэффициенты совместно незначимы

## 2.3 sleep equation #3

In [155]:
some_string7 = '''Names  Estimate  Std.Error t_nabl 
(Intercept) 3587.178292  220.238404 16.2877 
totwrk        -0.149121    0.026286 -5.6730 
age           -6.258663   11.191863 -0.5592   
I(age^2)       0.106517    0.133767  0.7963  
male         161.890143   82.177481  1.9700  
south        112.143584   40.717344  2.7542 
totwrk:male   -0.034694    0.036279 -0.9563'''

In [156]:
df = pd.read_csv(io.StringIO(some_string7), sep='\s+' )
df.round(3)

Unnamed: 0,Names,Estimate,Std.Error,t_nabl
0,(Intercept),3587.178,220.238,16.288
1,totwrk,-0.149,0.026,-5.673
2,age,-6.259,11.192,-0.559
3,I(age^2),0.107,0.134,0.796
4,male,161.89,82.177,1.97
5,south,112.144,40.717,2.754
6,totwrk:male,-0.035,0.036,-0.956


In [157]:
alpha = 0.01
t_crit = stats.t.ppf(1-alpha/2, 706-6-1)
t_crit.round(3)

2.583

In [158]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if np.abs(x['t_nabl']) > t_crit else 'ne znachimo', axis=1 )
df

Unnamed: 0,Names,Estimate,Std.Error,t_nabl,znachimost
0,(Intercept),3587.178292,220.238404,16.2877,znachimo
1,totwrk,-0.149121,0.026286,-5.673,znachimo
2,age,-6.258663,11.191863,-0.5592,ne znachimo
3,I(age^2),0.106517,0.133767,0.7963,ne znachimo
4,male,161.890143,82.177481,1.97,ne znachimo
5,south,112.143584,40.717344,2.7542,znachimo
6,totwrk:male,-0.034694,0.036279,-0.9563,ne znachimo


$$
H_0: \beta_{male} = \beta_{male*totwrk} = 0
$$

$$
H_1: \beta_{male}^2 + \beta_{male*totwrk}^2 > 0
$$

In [117]:
unrest, rest = np.float_( re.findall(r'([-+]?\d+.\d+)', 'R2                      0.129          0.119'))
print(unrest, rest)

0.129 0.119


In [118]:
F_nabl = (unrest - rest)/(1-unrest)*(706 - 7)/2
F_nabl.round(3)

4.013

In [119]:
alpha = 0.01
F_crit = stats.f.ppf(1-alpha,2,706-7)
F_crit.round(3)

4.636

$ F_{nabl} < F_{crit} $ следовательно данные согласуются с гипотезой $H_0 =>$ коэффициенты совместно незначимы

## 2.4 wage equation #1

In [159]:
some_string8 = '''Names Estimate  Std.Error t_nabl   
(Intercept)  2.93780233  1.65623416  1.7738   
age          0.14475408  0.10003338  1.4471    
I(age^2)    -0.00185652  0.00150253 -1.2356    
IQ           0.00821604  0.00084095  9.7699 
south       -0.10027331  0.02683810 -3.7362 
urban        0.17319472  0.02766716  6.2599 
married      0.20153862  0.04023976  5.0084'''

In [160]:
df = pd.read_csv(io.StringIO(some_string8), sep='\s+' )
df.round(3)

Unnamed: 0,Names,Estimate,Std.Error,t_nabl
0,(Intercept),2.938,1.656,1.774
1,age,0.145,0.1,1.447
2,I(age^2),-0.002,0.002,-1.236
3,IQ,0.008,0.001,9.77
4,south,-0.1,0.027,-3.736
5,urban,0.173,0.028,6.26
6,married,0.202,0.04,5.008


In [161]:
alpha = 0.01
t_crit = stats.t.ppf(1-alpha/2, 935-6-1)
t_crit.round(3)

2.581

In [162]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if np.abs(x['t_nabl']) > t_crit else 'ne znachimo', axis=1 )
df

Unnamed: 0,Names,Estimate,Std.Error,t_nabl,znachimost
0,(Intercept),2.937802,1.656234,1.7738,ne znachimo
1,age,0.144754,0.100033,1.4471,ne znachimo
2,I(age^2),-0.001857,0.001503,-1.2356,ne znachimo
3,IQ,0.008216,0.000841,9.7699,znachimo
4,south,-0.100273,0.026838,-3.7362,znachimo
5,urban,0.173195,0.027667,6.2599,znachimo
6,married,0.201539,0.04024,5.0084,znachimo


$$
H_0: \beta_{age} = \beta_{age^2} = 0
$$

$$
H_1: \beta_{age}^2 + \beta_{age^2}^2 > 0
$$

In [124]:
unrest, rest = np.float_( re.findall(r'([-+]?\d+.\d+)', 'R2                      0.201          0.175'))
print(unrest, rest)

0.201 0.175


In [125]:
F_nabl = (unrest - rest)/(1-unrest)*(935 - 7)/2
F_nabl.round(3)

15.099

In [126]:
alpha = 0.01
F_crit = stats.f.ppf(1-alpha,2,935-7)
F_crit.round(3)

4.628

$ F_{nabl} > F_{crit} $ следовательно данные не согласуются с гипотезой $H_0 =>$ коэффициенты совместно значимы

# 3 F-тест: структурные ограничения

$$
F_{crit} (\alpha, df_{num}, df_{denum})
$$

## 3.1 cost equation

In [29]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/List05/Electricity.csv')
df

Unnamed: 0,cost,q,pl,sl,pk,sk,pf,sf
0,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...
153,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


In [30]:
models = smf.ols(data = df, formula = 'np.log(cost)~np.log(q)+I(np.log(q)**2)+np.log(pl)+np.log(pk)+np.log(pf)').fit()
models.summary() 

0,1,2,3
Dep. Variable:,np.log(cost),R-squared:,0.992
Model:,OLS,Adj. R-squared:,0.992
Method:,Least Squares,F-statistic:,3880.0
Date:,"Sun, 02 Apr 2023",Prob (F-statistic):,2.45e-158
Time:,22:56:44,Log-Likelihood:,91.515
No. Observations:,158,AIC:,-171.0
Df Residuals:,152,BIC:,-152.7
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-6.7387,0.706,-9.541,0.000,-8.134,-5.343
np.log(q),0.4030,0.032,12.734,0.000,0.340,0.466
I(np.log(q) ** 2),0.0304,0.002,14.024,0.000,0.026,0.035
np.log(pl),0.1461,0.070,2.073,0.040,0.007,0.285
np.log(pk),0.1571,0.058,2.721,0.007,0.043,0.271
np.log(pf),0.6847,0.043,16.043,0.000,0.600,0.769

0,1,2,3
Omnibus:,2.342,Durbin-Watson:,1.798
Prob(Omnibus):,0.31,Jarque-Bera (JB):,2.078
Skew:,0.109,Prob(JB):,0.354
Kurtosis:,3.518,Cond. No.,4990.0


In [31]:
models.summary(alpha=0.01).tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.005,0.995]
Intercept,-6.7387,0.706,-9.541,0.000,-8.581,-4.896
np.log(q),0.4030,0.032,12.734,0.000,0.320,0.486
I(np.log(q) ** 2),0.0304,0.002,14.024,0.000,0.025,0.036
np.log(pl),0.1461,0.070,2.073,0.040,-0.038,0.330
np.log(pk),0.1571,0.058,2.721,0.007,0.007,0.308
np.log(pf),0.6847,0.043,16.043,0.000,0.573,0.796


In [32]:
some_string9 = '''names coef std.err
Intercept -6.7387 0.706
np.log(q) 0.4030 0.032
I(np.log(q)**2) 0.0304 0.002
np.log(pl) 0.1461 0.070
np.log(pk) 0.1571 0.058
np.log(pf) 0.6847 0.043'''

In [33]:
df = pd.read_csv(io.StringIO(some_string9), sep='\s+' )
df

Unnamed: 0,names,coef,std.err
0,Intercept,-6.7387,0.706
1,np.log(q),0.403,0.032
2,I(np.log(q)**2),0.0304,0.002
3,np.log(pl),0.1461,0.07
4,np.log(pk),0.1571,0.058
5,np.log(pf),0.6847,0.043


In [34]:
df['t_nabl'] = df['coef']/df['std.err']
df.round(3)

Unnamed: 0,names,coef,std.err,t_nabl
0,Intercept,-6.739,0.706,-9.545
1,np.log(q),0.403,0.032,12.594
2,I(np.log(q)**2),0.03,0.002,15.2
3,np.log(pl),0.146,0.07,2.087
4,np.log(pk),0.157,0.058,2.709
5,np.log(pf),0.685,0.043,15.923


In [35]:
alpha = 0.01
t_crit = stats.t.ppf(1-alpha, 158-4-1)
t_crit.round(3)

2.608

In [36]:
df['znachimost'] = df.apply(lambda x: 'znachim' if np.abs(x['t_nabl']) > t_crit else 'ne znachim', axis=1 )
df

Unnamed: 0,names,coef,std.err,t_nabl,znachimost
0,Intercept,-6.7387,0.706,-9.544901,znachim
1,np.log(q),0.403,0.032,12.59375,znachim
2,I(np.log(q)**2),0.0304,0.002,15.2,znachim
3,np.log(pl),0.1461,0.07,2.087143,ne znachim
4,np.log(pk),0.1571,0.058,2.708621,znachim
5,np.log(pf),0.6847,0.043,15.923256,znachim


### 3.1.1 Гипотеза 1

$$
H_0: \beta_{pf} + \beta_{pl} + \beta_{pk} = 1
$$

In [45]:
models.f_test('np.log(pl)+np.log(pk)+np.log(pf) = 1')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=0.014541184876261685, p=0.9041775484872097, df_denom=152, df_num=1>

In [71]:
alpha = 0.01
df_denom=152
df_num=1
t_crit = stats.f.ppf(1-alpha, df_num, df_denom)
t_crit.round(3)

6.805

Вывод: гипотезу не отвергаем, т.к. $ F_{nabl} < F_{crit} $

### 3.1.2 Гипотеза 2

$$
H_0: \beta_{pl} + \beta_{pk}
$$

In [73]:
models.f_test('np.log(pl) = np.log(pk)')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=0.020086275898515187, p=0.8874840609151591, df_denom=152, df_num=1>

In [74]:
alpha = 0.01
df_denom=152
df_num=1
t_crit = stats.f.ppf(1-alpha, df_num, df_denom)
t_crit.round(3)

6.805

Вывод: гипотезу не отвергаем, т.к. $ F_{nabl} < F_{crit} $

### 3.1.3 Гипотеза 3

$$
H_0: \beta_{pf} + \beta_{pl} + \beta_{pk} 
$$

In [75]:
models.f_test('np.log(pf)=np.log(pl)=np.log(pk)')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=26.41708068625207, p=1.424069501505643e-10, df_denom=152, df_num=2>

In [76]:
alpha = 0.01
df_denom=152
df_num=2
t_crit = stats.f.ppf(1-alpha, df_num, df_denom)
t_crit.round(3)

4.748

Вывод: гипотезу отвергаем, т.к. $ F_{nabl} > F_{crit} $

## 3.2 output equation

In [117]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
df

Unnamed: 0,capital,labour,output,wage
0,2.606563,184,9.250759,43.080307
1,1.323237,91,3.664310,27.780016
2,22.093692,426,28.781516,44.467748
3,10.737851,72,4.124642,39.734710
4,1.161365,46,2.890150,34.650709
...,...,...,...,...
564,2.625403,20,1.424376,33.477545
565,1.276386,61,2.109048,26.300732
566,1.953869,117,6.241870,41.153979
567,1.318527,46,7.902237,66.720139


In [121]:
models1 = smf.ols(data = df, formula = 'np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)').fit()
models.summary() 

0,1,2,3
Dep. Variable:,np.log(output),R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.888
Method:,Least Squares,F-statistic:,1499.0
Date:,"Sun, 02 Apr 2023",Prob (F-statistic):,1.65e-268
Time:,23:39:57,Log-Likelihood:,-279.62
No. Observations:,569,AIC:,567.2
Df Residuals:,565,BIC:,584.6
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-5.0073,0.221,-22.649,0.000,-5.442,-4.573
np.log(capital),0.1493,0.015,10.141,0.000,0.120,0.178
np.log(labour),0.7204,0.019,37.487,0.000,0.683,0.758
np.log(wage),0.9214,0.058,16.001,0.000,0.808,1.034

0,1,2,3
Omnibus:,252.01,Durbin-Watson:,2.008
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3723.965
Skew:,1.547,Prob(JB):,0.0
Kurtosis:,15.145,Cond. No.,82.3


In [122]:
models1.summary(alpha=0.05).tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-5.0073,0.221,-22.649,0.000,-5.442,-4.573
np.log(capital),0.1493,0.015,10.141,0.000,0.120,0.178
np.log(labour),0.7204,0.019,37.487,0.000,0.683,0.758
np.log(wage),0.9214,0.058,16.001,0.000,0.808,1.034


In [123]:
some_string10 = '''names coef std.err
Intercept -5.0073 0.221 
np.log(capital) 0.1493 0.015 
np.log(labour) 0.7204 0.019 
np.log(wage) 0.9214 0.058'''

In [106]:
df = pd.read_csv(io.StringIO(some_string10), sep='\s+' )
df

Unnamed: 0,names,coef,std.err
0,Intercept,-5.0073,0.221
1,np.log(capital),0.1493,0.015
2,np.log(labour),0.7204,0.019
3,np.log(wage),0.9214,0.058


In [107]:
df['t_nabl'] = df['coef']/df['std.err']
df.round(3)

Unnamed: 0,names,coef,std.err,t_nabl
0,Intercept,-5.007,0.221,-22.657
1,np.log(capital),0.149,0.015,9.953
2,np.log(labour),0.72,0.019,37.916
3,np.log(wage),0.921,0.058,15.886


In [108]:
alpha = 0.05
t_crit = stats.t.ppf(1-alpha/2, 569-3-1)
t_crit.round(3)

1.964

In [109]:
df['znachimost'] = df.apply(lambda x: 'znachim' if np.abs(x['t_nabl']) > t_crit else 'ne znachim', axis=1 )
df

Unnamed: 0,names,coef,std.err,t_nabl,znachimost
0,Intercept,-5.0073,0.221,-22.657466,znachim
1,np.log(capital),0.1493,0.015,9.953333,znachim
2,np.log(labour),0.7204,0.019,37.915789,znachim
3,np.log(wage),0.9214,0.058,15.886207,znachim


### 3.2.1 Гипотеза 1

$$
H_0: \beta_{capital} + \beta_{labour} + \beta_{wage} = 1
$$

In [124]:
models1.f_test('np.log(capital)+np.log(labour)+np.log(wage) = 1')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=198.58693315849158, p=7.28095945466417e-39, df_denom=565, df_num=1>

In [125]:
alpha = 0.05
df_denom=565
df_num=1
t_crit = stats.f.ppf(1-alpha, df_num, df_denom)
t_crit.round(3)

3.858

Вывод: гипотезу отвергаем, т.к. $ F_{nabl} > F_{crit} $

### 3.2.2 Гипотеза 2

$$
H_0: \beta_{labour} + \beta_{wage}
$$

In [126]:
models1.f_test('np.log(labour)+np.log(wage)')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=723.5613252762488, p=3.1582056293248693e-103, df_denom=565, df_num=1>

In [127]:
alpha = 0.05
df_denom=565
df_num=1
t_crit = stats.f.ppf(1-alpha, df_num, df_denom)
t_crit.round(3)

3.858

Вывод: гипотезу отвергаем, т.к. $ F_{nabl} > F_{crit} $

### 3.2.3 Гипотеза 3

$$
H_0: \beta_{capital} + \beta_{labour} + \beta_{wage}
$$

In [128]:
models1.f_test('np.log(capital)+np.log(labour)+np.log(wage)')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=1017.8915682267822, p=1.6978783562369782e-128, df_denom=565, df_num=1>

In [129]:
alpha = 0.05
df_denom=565
df_num=1
t_crit = stats.f.ppf(1-alpha, df_num, df_denom)
t_crit.round(3)

3.858

Вывод: гипотезу отвергаем, т.к. $ F_{nabl} > F_{crit} $