In [16]:
# 导入必要的库
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

# 数据路径
data_path = r"D:\data\data03.csv"  # 数据文件路径
output_pdf_path = r"D:\data\anova_results.pdf"  # 输出 PDF 文件路径

# 尝试读取数据，指定不同的编码格式
try:
    # 尝试使用 UTF-8 编码
    data = pd.read_csv(data_path, encoding="utf-8")
    print("使用 UTF-8 编码加载成功！")
except UnicodeDecodeError:
    try:
        # 尝试使用 GBK 编码
        data = pd.read_csv(data_path, encoding="gbk")
        print("使用 GBK 编码加载成功！")
    except UnicodeDecodeError:
        try:
            # 尝试使用 ISO-8859-1 编码
            data = pd.read_csv(data_path, encoding="iso-8859-1")
            print("使用 ISO-8859-1 编码加载成功！")
        except UnicodeDecodeError:
            print("无法确定文件编码，请检查文件格式或手动指定编码。")
            data = None

# 如果数据加载成功，继续后续操作
if data is not None:
    # 查看数据的前几行，确认数据加载正确
    print("\n数据的前几行：")
    print(data.head())

    # 清理列名：将特殊字符替换为下划线，并移除空格
    data.columns = data.columns.str.replace('[ /%+-]', '_', regex=True).str.strip()
    data.columns = data.columns.str.replace('[°Ω]', 'X', regex=True)  # 替换特殊字符为合法字符
    data.columns = data.columns.str.replace(r'^\d+', 'Var_', regex=True)  # 以数字开头的列名前加 "Var_"

    # 确定控制变量和响应变量的列名
    control_columns = data.columns[:11]  # 前11列是控制变量
    response_columns = data.columns[11:]  # 后9列是响应变量

    print("\n清理后的控制变量列名：", control_columns)
    print("清理后的响应变量列名：", response_columns)

    # 创建 PDF 文件
    with PdfPages(output_pdf_path) as pdf:
        # 对每个控制变量和每个响应变量进行方差分析
        for control_var in control_columns:
            for response_var in response_columns:
                formula = f"{response_var} ~ C({control_var})"  # 使用当前控制变量进行分析
                try:
                    model = ols(formula, data=data).fit()
                    anova_results = sm.stats.anova_lm(model, typ=2)
                    f_value = anova_results['F'][0]
                    p_value = anova_results['PR(>F)'][0]

                    # 绘制 F 值和 P 值的条形图
                    plt.figure(figsize=(10, 5))
                    sns.set(style="whitegrid")

                    # 绘制 F 值条形图
                    plt.subplot(1, 2, 1)
                    sns.barplot(x=[f_value], y=[response_var], palette="viridis")
                    plt.title(f"F Value for {response_var} ~ {control_var}")
                    plt.xlabel("F Value")
                    plt.ylabel("Response Variable")

                    # 绘制 P 值条形图
                    plt.subplot(1, 2, 2)
                    sns.barplot(x=[p_value], y=[response_var], palette="magma")
                    plt.title(f"P Value for {response_var} ~ {control_var}")
                    plt.xlabel("P Value")
                    plt.ylabel("Response Variable")

                    # 添加文字说明
                    plt.suptitle(f"ANOVA Results for {response_var} ~ {control_var}", fontsize=16)
                    if p_value < 0.05:
                        plt.figtext(0.5, 0.01, f"Result: {control_var} has a significant effect on {response_var} (P < 0.05)", fontsize=12, ha="center", color="red")
                    else:
                        plt.figtext(0.5, 0.01, f"Result: {control_var} has no significant effect on {response_var} (P >= 0.05)", fontsize=12, ha="center", color="blue")

                    # 调整布局并保存到 PDF
                    plt.tight_layout(rect=[0, 0.05, 1, 0.95])
                    pdf.savefig()  # 保存当前页面到 PDF
                    plt.close()  # 关闭当前图

                except Exception as e:
                    print(f"对 {control_var} 和 {response_var} 进行方差分析时出错：{e}")

    print(f"\n所有分析结果已保存到 PDF 文件中：{output_pdf_path}")

使用 GBK 编码加载成功！

数据的前几行：
   SBET/Specific surface area/m2g-1  Average pore size/nm  \
0                              8.90              1.836667   
1                              6.20              2.866667   
2                              5.30              2.816667   
3                              0.05              0.210000   
4                             22.00              2.600000   

   Total pore volume/cm3g-1         ID         IG  ID/IG       2θ/°   d002/nm  \
0                  5.166667  1352.0000  1566.0000  0.870  26.524000  3.355800   
1                  6.100000  1352.0000  1566.0000  0.920  26.506000  3.360000   
2                  4.483333  1352.0000  1566.0000  0.930  26.508000  3.350700   
3                  4.800000     0.7376     1.0682  0.691  20.942400  0.934868   
4                  5.000000     0.0711     0.8628  0.082  10.969805  1.616398   

   La/nm  Lc/nm     La/Lc  first discharge/mAh g-1  first charge/mAh g-1  \
0  100.0   55.6  1.798561               229.53


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=[f_value], y=[response_var], palette="viridis")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=[p_value], y=[response_var], palette="magma")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=[f_value], y=[response_var], palette="viridis")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=[p_value], y=[response_var], palette="magma")

Passing `palette` without assigning `hue` is deprecated and will be removed in 


所有分析结果已保存到 PDF 文件中：D:\data\anova_results.pdf
