## 1-sample t-test

In [5]:
import numpy as np
import scipy.stats as sp
from scipy import stats
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.proportion import proportions_ztest

In [6]:
df = pd.DataFrame({'sample':[74.5, 81.2, 73.8, 82.0, 76.3, 75.7, 80.2, 72.6, 77.9, 82.8]})
t_result = stats.ttest_1samp(df, 76.7)
t,p = t_result.statistic.round(3), t_result.pvalue.round(3)
print("1-Sample t-test")
print("t검정통계량 : {}".format(t))
print("p-value : {}".format(p))

1-Sample t-test
t검정통계량 : [0.864]
p-value : [0.41]


In [7]:
df = pd.DataFrame({'sample':[85.0, 79.0, 79.1, 79.9, 81.6, 78.6, 85.4, 83.4, 78.1, 79.2]})
t_result = stats.ttest_1samp(df, 80.93)
t,p = t_result.statistic.round(3), t_result.pvalue.round(3)
print("1-Sample t-test")
print("t검정통계량 : {}".format(t))
print("p-value : {}".format(p))

1-Sample t-test
t검정통계량 : [0.]
p-value : [1.]


### 2-sample t-test

In [8]:
df1 = pd.DataFrame({'sample':[6,5,5,4,6,7,6,4,5,6,4,5,5,6,4,8,6,5,6,7]})
df2 = pd.DataFrame({'sample':[7,5,7,8,7,8,8,5,7,6,5,5,6,6,5,7,9,7,7,8]})

t_result = stats.ttest_ind(df1,df2)
t,p = t_result.statistic.round(3), t_result.pvalue.round(3)

print("2-Smaple t-test")
print("t검정통계량 : {}".format(t))
print("P-value : {}".format(p))

2-Smaple t-test
t검정통계량 : [-3.122]
P-value : [0.003]


In [9]:
df1 = pd.DataFrame({'A':[1.883, 1.715, 1.799, 1.768, 1.711, 1.832, 1.427, 1.344]})
df2 = pd.DataFrame({'B':[1.435, 1.572, 1.486, 1.511, 1.457, 1.548, 1.404, 1.883]})

t_result = stats.ttest_ind(df1,df2)
t, p = t_result.statistic.round(3), t_result.pvalue.round(3)

print("2-Smaple t-test")
print("t검정통계량 : {}".format(t))
print("P-value : {}".format(p))

2-Smaple t-test
t검정통계량 : [1.7]
P-value : [0.111]


### Paired t-test

In [16]:
df1 = pd.DataFrame({'A':[720, 589, 780, 648, 720, 589, 780, 648, 780, 648]})
df2 = pd.DataFrame({'B':[810, 670, 790, 712, 810, 670, 790, 712, 790, 712]})

t_result = stats.ttest_rel(df1, df2)
t,p = t_result.statistic.round(3), t_result.pvalue.round(3)

print("Paired t-test")
print("t검정통계량 : {}".format(t))
print("P-value : {}".format(p))

Paired t-test
t검정통계량 : [-5.324]
P-value : [0.]


In [10]:
count = np.array([4,1])
nobs = np.array([1000,1200])
stat, pval = proportions_ztest(count, nobs)

print("2 Proportion test")
print('p 검정통계량 : {:0.3f}'.format(stat))
print('p value : {:0.3f}'.format(pval))

2 Proportion test
p 검정통계량 : 1.553
p value : 0.120


---
### 카이스퀘어 ,chi-square test

In [11]:
df = pd.DataFrame({'HSG':[270,260,236,234], 'SS':[228,285,225,262], 'SPA':[277,284,231,208]})
chi, pval, dof, expected = stats.chi2_contingency(df.T)

print("chi-square test")
print('chisq:{:0.3f}'.format(chi))
print('p:{:0.03f}'.format(pval))
print('degree pf freedom: {}'.format(dof))
print('expected value: \n{}'.format(expected.round(3)))

chi-square test
chisq:13.366
p:0.038
degree pf freedom: 6
expected value: 
[[258.333 276.333 230.667 234.667]
 [258.333 276.333 230.667 234.667]
 [258.333 276.333 230.667 234.667]]


In [12]:
df = pd.DataFrame({'HSG':[270,260,236,234], 'SS':[228,285,225,262], 'SPA':[277,284,231,208]})
chi, pval, dof, expected = stats.chi2_contingency(df.T)

print("chi-square test")
print('chisq:{:0.3f}'.format(chi))
print('p:{:0.03f}'.format(pval))
print('degree pf freedom: {}'.format(dof))
print('expected value: \n{}'.format(expected.round(3)))

chi-square test
chisq:13.366
p:0.038
degree pf freedom: 6
expected value: 
[[258.333 276.333 230.667 234.667]
 [258.333 276.333 230.667 234.667]
 [258.333 276.333 230.667 234.667]]


### 상관분석, Correlation Analysis

In [13]:
df1 = [1,2,3,4,4,5,6,6,7,8]
df2 = [23,29,49,64,74,87,96,97,109,119]

corr, pval = sp.stats.pearsonr(df1, df2)

print("correlation Analysis")
print('crr:{:0.3f}'.format(corr))
print('p:{:0.3f}'.format(pval))

correlation Analysis
crr:0.989
p:0.000


In [17]:
minutes = [1,2,3,4,4,5,6,6,7,8]
units = [23,29,49,64,74,87,96,97,109,119]

minutes = sm.add_constant(minutes)
model = sm.OLS(units, minutes)
result = model.fit()

print(result.summary())



                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.979
Model:                            OLS   Adj. R-squared:                  0.976
Method:                 Least Squares   F-statistic:                     365.3
Date:                Fri, 26 Apr 2019   Prob (F-statistic):           5.82e-08
Time:                        13:35:17   Log-Likelihood:                -29.401
No. Observations:                  10   AIC:                             62.80
Df Residuals:                       8   BIC:                             63.41
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.1712      3.886      1.845      0.1

  "anyway, n=%i" % int(n))
