In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from scipy.stats.distributions import _continuous_distns as cd
from statsmodels.formula.api import ols
from statsmodels.stats import anova

# 单样本双边t检验

In [2]:
a = np.array([9.8, 10.4, 10.6, 9.6, 9.7, 9.9, 10.9, 11.1, 9.6,
            10.2, 10.3, 9.6, 9.9, 11.2, 10.6, 9.8, 10.5, 10.1, 10.5, 9.7])  # type:np.ndarray
t, p_ = stats.ttest_1samp(a, 10)  # 检验a的均值是否为10
# 假设显著水平 α=0.05
print('接受原假设H0：a的均值为10' if p_ > 0.05 else '接受备择假设H1：a的均值不为0')


接受原假设H0：a的均值为10


# 两个独立正态分布总体均值的两样本精确t检验

In [3]:
a = cd.norm.rvs(loc=2, scale=2, size=200)
b = cd.norm.rvs(loc=2.31, scale=2, size=200)
t, p_ = stats.ttest_ind(a, b, equal_var=True)
# 假设显著水平 α=0.05
print('接受原假设H0：a和b的均值无显著差别' if p_ > 0.05 else '接受备择假设H1：a和b的均值有显著差别')


接受原假设H0：a和b的均值无显著差别


# 多个总体的方差齐次性检验

In [4]:
a = cd.norm.rvs(loc=5, scale=2, size=100)
b = cd.norm.rvs(loc=12, scale=3, size=90)
c = cd.norm.rvs(loc=52, scale=3.1, size=90)
t, p_ = stats.levene(a, b, c)
print(f'在显著性水平α=0.05的情况下a、b、c的方差{"不" if p_>0.05 else ""}存在显著差异')


在显著性水平α=0.05的情况下a、b、c的方差存在显著差异


In [5]:
a = cd.norm.rvs(loc=2, scale=2, size=200)
b = cd.norm.rvs(loc=2.31, scale=2, size=200)
c = cd.norm.rvs(loc=2.05, scale=2, size=100)
F, p_ = stats.f_oneway(a, b, c)     # 注意比较a、b之间的均值是存在显著差异的
print(f'在显著性水平α=0.05的情况下a、b、c的均值{"不" if p_>0.05 else ""}存在显著差异')


在显著性水平α=0.05的情况下a、b、c的均值存在显著差异


In [6]:
X1 = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
X2 = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
Y = [76, 78, 76, 76, 76, 74, 74, 76, 76, 55, 65, 90, 65, 90, 65, 90, 90, 79, 70, 90,
      88, 76, 76, 76, 56, 76, 76, 98, 88, 78, 65, 67, 67, 87, 78, 56, 54, 56, 54, 56]
data = pd.DataFrame({'X1': X1, 'X2': X2, 'Y': Y})
model_result = ols('Y~X1+X2+X1:X2', data).fit()   # 多元线性回归
anova_res = anova.anova_lm(model_result)
anova_res.rename(columns={'PR(>F)': 'p_'}, inplace=True)
print(anova_res)



            df    sum_sq      mean_sq         F        p_
X1         1.0   265.225   265.225000  2.444407  0.126693
X2         1.0   207.025   207.025000  1.908016  0.175698
X1:X2      1.0  1050.625  1050.625000  9.682932  0.003631
Residual  36.0  3906.100   108.502778       NaN       NaN
