# 独立样本t检验

- **做多个独立样本t检验，并将结果保存到一张表格中**

In [60]:
import pandas as pd
import scipy.stats as stats

data = {
    'group1': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B'],
    'group2': ['D', 'D', 'D', 'D', 'D', 'C', 'C', 'C', 'C'],
    'value1': [23, 21, 18, 25, 27, 29, 30, 28, 26],
    'value2': [123, 121, 118, 125, 127, 129, 130, 128, 126],
}

df = pd.DataFrame(data)

# 指定要比较的组合
comparisons = [('A', 'B'), ('C', 'D')]
columns_cmp = ['group1', 'group2']
columns_dep = ['value1', 'value2']

# 初始化一个空的 DataFrame 用于存储结果
result_df = pd.DataFrame(columns=['Comparison', 'mean_value_1', 'mean_value_2', 'T-statistic', 'P-value'])

# 遍历每个组合并进行独立样本 t 检验
for comparison in comparisons:    
    for column in columns_cmp:
            for column_dep in columns_dep:
                values_1 = df[column_dep][(df[column] == comparison[0])]
                values_2 = df[column_dep][(df[column] == comparison[1])]

                # 计算均值
                mean_value_1 = values_1.mean()
                mean_value_2 = values_2.mean()

                # 进行独立样本 t 检验
                t_statistic, p_value = stats.ttest_ind(values_1, values_2)
    
                # 将结果添加到结果 DataFrame
                result_df = pd.concat([result_df, pd.DataFrame({
                    'Comparison': [f"{comparison[0]} vs {comparison[1]} ({column})-{column_dep}"],
                    'mean_value_1': [mean_value_1],
                    'mean_value_2': [mean_value_2],
                    'T-statistic': [t_statistic],
                    'P-value': [p_value]
                })], ignore_index=True)

# 删除包含空值的行
result_df = result_df.dropna()
# 显示结果 DataFrame
print(result_df)

               Comparison  mean_value_1  mean_value_2  T-statistic   P-value
0  A vs B (group1)-value1         21.75          28.0    -4.066250  0.004772
1  A vs B (group1)-value2        121.75         128.0    -4.066250  0.004772
6  C vs D (group2)-value1         28.25          22.8     2.833457  0.025281
7  C vs D (group2)-value2        128.25         122.8     2.833457  0.025281


  result_df = pd.concat([result_df, pd.DataFrame({


# 单因素方差检验

In [63]:
import pandas as pd
from scipy.stats import f_oneway

# 示例数据
data = {
    'category': ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C'],
    'value': [23, 21, 18, 25, 27, 29, 30, 28, 26]
}

df = pd.DataFrame(data)

# 执行单因素方差分析
groups = [df['value'][df['category'] == category] for category in df['category'].unique()]
f_statistic, p_value = f_oneway(*groups)

# 显示结果
print(f"F-statistic: {f_statistic}")
print(f"P-value: {p_value}")

# 判断显著性
alpha = 0.05
if p_value < alpha:
    print("在显著性水平为 0.05 下，拒绝原假设")
else:
    print("在显著性水平为 0.05 下，接受原假设")


F-statistic: 9.93023255813954
P-value: 0.012489491482224382
在显著性水平为 0.05 下，拒绝原假设


In [65]:
import pandas as pd
from scipy.stats import f_oneway

# 示例数据
data = {
    'category': ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C'],
    'value': [23, 21, 18, 25, 27, 29, 30, 28, 26]
}

df = pd.DataFrame(data)

# 执行单因素方差分析
groups = [df['value'][df['category'] == category] for category in df['category'].unique()]
f_statistic, p_value = f_oneway(*groups) # *groups 是将列表 groups 中的元素拆包传递给 f_oneway 函数。在这里，groups 列表包含了每个类别的值，而 *groups 则将这些列表拆包，作为参数传递给 f_oneway 函数。

# 显示结果
print(f"F-statistic: {f_statistic}")
print(f"P-value: {p_value}")

# 判断显著性
alpha = 0.05
if p_value < alpha:
    print("在显著性水平为 0.05 下，拒绝原假设")
else:
    print("在显著性水平为 0.05 下，接受原假设")


F-statistic: 9.93023255813954
P-value: 0.012489491482224382
在显著性水平为 0.05 下，拒绝原假设
