In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm 
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_params # вывод результатов тестирования
from statsmodels.iolib.summary2 import summary_col # вывод результатов тестирования
from statsmodels.stats.outliers_influence import variance_inflation_factor # VIF
import scipy
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
df = pd.read_csv('https://raw.githubusercontent.com/ryupepa/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
df['totwrk2_10000'] = df['totwrk']**2/10000

In [5]:
# Модель sleep на totwrk/100, totwrk^2/10000, age, smsa, male
Sleep_eq12 = smf.ols(formula = 'sleep~I(totwrk/100)+I(totwrk**2/10000)+age+smsa+male', data = df).fit()

In [6]:
info_dict = {'No. Observations': lambda x: '{0:d}'.format(int(x.nobs)), 
             'Residual Std. Error': lambda x: "%#8.3f" % x.mse_resid**0.5,
             'F-statistic': lambda x: '{:.3f}'.format(x.fvalue), 
             'Prob (F-statistic)': lambda x: '{:.3f}'.format(x.f_pvalue)
             }
print(summary_col(Sleep_eq12, float_format='%.3f', stars=True, info_dict = info_dict))


                          sleep   
----------------------------------
Intercept              3428.805***
                       (76.655)   
I(totwrk / 100)        -8.195*    
                       (4.789)    
I(totwrk ** 2 / 10000) -0.214*    
                       (0.111)    
age                    2.817**    
                       (1.387)    
smsa                   -78.049**  
                       (32.072)   
male                   86.639**   
                       (34.200)   
R-squared              0.128      
R-squared Adj.         0.122      
No. Observations       706        
Residual Std. Error    416.457    
F-statistic            20.566     
Prob (F-statistic)     0.000      
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


In [7]:
df = summary_params(Sleep_eq12, alpha=0.05)
df['significance'] = df.apply(lambda x: 'Значим' if x['P>|t|']<0.05 else 'Незначим', axis=1)
df

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975],significance
Intercept,3428.804849,76.655048,44.73032,2.023213e-207,3278.303493,3579.306204,Значим
I(totwrk / 100),-8.195316,4.789446,-1.71112,0.08750201,-17.598716,1.208085,Незначим
I(totwrk ** 2 / 10000),-0.214156,0.110951,-1.930194,0.05398646,-0.431992,0.00368,Незначим
age,2.816697,1.386926,2.030892,0.0426434,0.093663,5.539731,Значим
smsa,-78.04946,32.072351,-2.433543,0.01520105,-141.018991,-15.07993,Значим
male,86.638759,34.199952,2.533301,0.01151657,19.491986,153.785532,Значим
