## 预处理数据集A

In [6]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# 读取 Excel 文件
df = pd.read_excel('rawdata/数据集A.xlsx')

# 仅指定要处理的行为变量
behavior_variables = [
    'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc', 
    'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span'
]

# 提取要处理的数据
behavior_data = df[behavior_variables]

# 处理前缺失值统计
missing_values_before = behavior_data.isna().sum()
print("缺失值填充前:")
print(missing_values_before)

# 使用 IterativeImputer 进行多重插补（MICE）
imputer = IterativeImputer(
    random_state=42, 
    max_iter=10,  # 增加最大迭代次数
    tol=1e-3,     # 调整收敛容忍度
    verbose=2     # 显示填充进度
)
imputed_behavior_data = imputer.fit_transform(behavior_data)

# 将填充后的数据转换回 DataFrame
behavior_df = pd.DataFrame(imputed_behavior_data, columns=behavior_variables)

# 仅更新处理过的字段，保持其他字段不变
df.update(behavior_df)

# 处理后缺失值统计
missing_values_after = df[behavior_variables].isna().sum()
print("缺失值填充后:")
print(missing_values_after)

# 保存处理后的数据
output_path = '预处理后的数据集A.xlsx'
df.to_excel(output_path, index=False)

print(f"数据处理完成，已保存至 {output_path}")

缺失值填充前:
stroop_incongruent_rt            0
stroop_interference_effect_rt    4
nogo_acc                         0
switch_cost                      1
rm_1750_acc                      8
rm_750_acc                       1
dsbt_span                        0
dtype: int64
[IterativeImputer] Completing matrix with shape (364, 7)
[IterativeImputer] Ending imputation round 1/10, elapsed time 0.00
[IterativeImputer] Change: 337.1600486648706, scaled tolerance: 1.36346256410256 
[IterativeImputer] Ending imputation round 2/10, elapsed time 0.01
[IterativeImputer] Change: 1.0661398149929937, scaled tolerance: 1.36346256410256 
[IterativeImputer] Early stopping criterion reached.
缺失值填充后:
stroop_incongruent_rt            0
stroop_interference_effect_rt    0
nogo_acc                         0
switch_cost                      0
rm_1750_acc                      0
rm_750_acc                       0
dsbt_span                        0
dtype: int64
数据处理完成，已保存至 预处理后的数据集A.xlsx


## 预处理数据集B


In [1]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# 读取 Excel 文件
df = pd.read_excel('rawdata/数据集B前测.xlsx')

# 仅指定要处理的行为变量
behavior_variables = [
    'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc', 
    'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span'
]


# 提取要处理的数据
behavior_data = df[behavior_variables]

# 处理前缺失值统计
missing_values_before = behavior_data.isna().sum()
print("缺失值填充前:")
print(missing_values_before)

# 使用 IterativeImputer 进行多重插补（MICE）
imputer = IterativeImputer(
    random_state=42, 
    max_iter=10,  # 增加最大迭代次数
    tol=1e-3,     # 调整收敛容忍度
    verbose=2     # 显示填充进度
)
imputed_behavior_data = imputer.fit_transform(behavior_data)

# 将填充后的数据转换回 DataFrame
behavior_df = pd.DataFrame(imputed_behavior_data, columns=behavior_variables)

# 仅更新处理过的字段，保持其他字段不变
df.update(behavior_df)

# 处理后缺失值统计
missing_values_after = df[behavior_variables].isna().sum()
print("缺失值填充后:")
print(missing_values_after)

# 保存处理后的数据
output_path = '预处理后的数据集B前测.xlsx'
df.to_excel(output_path, index=False)

print(f"数据处理完成，已保存至 {output_path}")


缺失值填充前:
stroop_incongruent_rt            1
stroop_interference_effect_rt    0
nogo_acc                         0
switch_cost                      0
rm_1750_acc                      1
rm_750_acc                       0
dsbt_span                        0
dtype: int64
[IterativeImputer] Completing matrix with shape (94, 7)
[IterativeImputer] Ending imputation round 1/10, elapsed time 0.01
[IterativeImputer] Change: 40.941604742338086, scaled tolerance: 1.1744642857142857 
[IterativeImputer] Ending imputation round 2/10, elapsed time 0.02
[IterativeImputer] Change: 0.0002507349138340942, scaled tolerance: 1.1744642857142857 
[IterativeImputer] Early stopping criterion reached.
缺失值填充后:
stroop_incongruent_rt            0
stroop_interference_effect_rt    0
nogo_acc                         0
switch_cost                      0
rm_1750_acc                      0
rm_750_acc                       0
dsbt_span                        0
dtype: int64
数据处理完成，已保存至 预处理后的数据集B前测.xlsx


In [2]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# 读取 Excel 文件
df = pd.read_excel('rawdata/数据集B后测.xlsx')

# 仅指定要处理的行为变量
behavior_variables = [
    'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc', 
    'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span'
]


# 提取要处理的数据
behavior_data = df[behavior_variables]

# 处理前缺失值统计
missing_values_before = behavior_data.isna().sum()
print("缺失值填充前:")
print(missing_values_before)

# 使用 IterativeImputer 进行多重插补（MICE）
imputer = IterativeImputer(
    random_state=42, 
    max_iter=10,  # 增加最大迭代次数
    tol=1e-3,     # 调整收敛容忍度
    verbose=2     # 显示填充进度
)
imputed_behavior_data = imputer.fit_transform(behavior_data)

# 将填充后的数据转换回 DataFrame
behavior_df = pd.DataFrame(imputed_behavior_data, columns=behavior_variables)

# 仅更新处理过的字段，保持其他字段不变
df.update(behavior_df)

# 处理后缺失值统计
missing_values_after = df[behavior_variables].isna().sum()
print("缺失值填充后:")
print(missing_values_after)

# 保存处理后的数据
output_path = '预处理后的数据集B后测.xlsx'
df.to_excel(output_path, index=False)

print(f"数据处理完成，已保存至 {output_path}")


缺失值填充前:
stroop_incongruent_rt            0
stroop_interference_effect_rt    0
nogo_acc                         0
switch_cost                      0
rm_1750_acc                      2
rm_750_acc                       1
dsbt_span                        3
dtype: int64
[IterativeImputer] Completing matrix with shape (94, 7)
[IterativeImputer] Ending imputation round 1/10, elapsed time 0.00
[IterativeImputer] Change: 0.787441138087691, scaled tolerance: 1.19425 
[IterativeImputer] Early stopping criterion reached.
缺失值填充后:
stroop_incongruent_rt            0
stroop_interference_effect_rt    0
nogo_acc                         0
switch_cost                      0
rm_1750_acc                      0
rm_750_acc                       0
dsbt_span                        0
dtype: int64
数据处理完成，已保存至 预处理后的数据集B后测.xlsx


## 基于HC矫正

In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# 读取Excel文件
df = pd.read_excel('预处理后的数据集A.xlsx')

# 统一列名格式（小写，去空格，替换符号）
df.columns = df.columns.str.strip().str.lower().str.replace('-', '_').str.replace(',', '')

# 打印实际的列名，确保匹配
print("Excel 文件的列名:", df.columns.tolist())

# 重新定义 behavior_variables，确保与实际列名匹配
behavior_variables = [
    'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc',
    'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span'
]

# 确保 behavior_variables 在数据集中存在
missing_columns = [col for col in behavior_variables if col not in df.columns]
if missing_columns:
    raise KeyError(f"以下列在数据集中未找到，请检查拼写或格式: {missing_columns}")

# 统一 Group 字段大小写
df['group'] = pd.to_numeric(df['group'], errors='coerce')  # 转换为数值型
df.dropna(subset=['group'], inplace=True)  # 删除无效数据

# 根据 Group 字段筛选数据
patients_df = df[df['group'] == 1].copy()  # SCZ 组
control_df = df[df['group'] == 2].copy()  # HC 组

# 确保数据集不为空
if patients_df.empty or control_df.empty:
    raise ValueError("筛选出的患者组或对照组为空，请检查 Group 字段的值是否正确！")

# 定义其他变量
continuous_variables = ['age', 'education_years']
categorical_variables = ['gender']

# One-hot encode categorical variables
def one_hot_encode(df, categorical_vars):
    for var in categorical_vars:
        if var in df.columns:
            dummies = pd.get_dummies(df[var], prefix=var, drop_first=True)
            df = pd.concat([df, dummies], axis=1)
            df.drop(var, axis=1, inplace=True)
    return df

patients_df = one_hot_encode(patients_df, categorical_variables)
control_df = one_hot_encode(control_df, categorical_variables)

# 标准化连续变量
scaler = StandardScaler()
control_df[continuous_variables] = scaler.fit_transform(control_df[continuous_variables])
patients_df[continuous_variables] = scaler.transform(patients_df[continuous_variables])

# 从对照组DataFrame中提取行为变量数据
control_data = control_df[behavior_variables]

# 使用对照组数据拟合 StandardScaler
scaler = StandardScaler()
scaler.fit(control_data)

# 使用对照组的标准化参数标准化患者组数据
standardized_patient_data = scaler.transform(patients_df[behavior_variables])
standardized_patient_df = pd.DataFrame(standardized_patient_data, columns=behavior_variables)

# 处理 Healthy Controls 组
standardized_control_data = scaler.transform(control_df[behavior_variables])
standardized_control_df = pd.DataFrame(standardized_control_data, columns=behavior_variables)

# 从数据框中获取所有人口学变量（已独热编码和标准化）
demographic_variables = [col for col in control_df.columns if col not in behavior_variables]

# 使用对照组数据拟合回归模型校正人口学效应
corrected_patient_data = pd.DataFrame()
corrected_control_data = pd.DataFrame()

for variable in behavior_variables:
    model = LinearRegression()
    model.fit(control_df[demographic_variables], control_df[variable])
    correction_patients = model.predict(patients_df[demographic_variables])
    correction_controls = model.predict(control_df[demographic_variables])
    corrected_patient_data[variable] = standardized_patient_df[variable] - correction_patients
    corrected_control_data[variable] = standardized_control_df[variable] - correction_controls

# 保存患者组和健康对照组的校正并标准化后的数据
corrected_patient_df = pd.DataFrame(corrected_patient_data)
corrected_control_df = pd.DataFrame(corrected_control_data)

corrected_patient_df.to_excel('./table/校正并标准化后的患者行为变量数据-数据集A.xlsx', index=False)
corrected_control_df.to_excel('./table/校正并标准化后的健康对照行为变量数据-数据集A.xlsx', index=False)

# 打印校正并标准化后的DataFrame
print(corrected_patient_df.head())
print(corrected_control_df.head())

print("校正并标准化完成，数据已保存到 './table/校正并标准化后的患者行为变量数据-数据集A.xlsx' 和 './table/校正并标准化后的健康对照行为变量数据-数据集A.xlsx'.")

# 计算校正并标准化后的均值和标准差
corrected_patient_means = corrected_patient_df.mean().round(2)
corrected_patient_stds = corrected_patient_df.std().round(2)
corrected_control_means = corrected_control_df.mean().round(2)
corrected_control_stds = corrected_control_df.std().round(2)

# 创建一个DataFrame来存储结果，以“均值±标准差”格式显示
summary_df = pd.DataFrame({
    'Corrected and Standardized Patient': corrected_patient_means.astype(str) + ' ± ' + corrected_patient_stds.astype(str),
    'Corrected and Standardized Control': corrected_control_means.astype(str) + ' ± ' + corrected_control_stds.astype(str)
}, index=behavior_variables)

# 打印结果
print(summary_df)

# 保存结果为Excel文件
summary_df.to_excel('./table/校正并标准化后的行为变量均值和标准差-数据集A.xlsx')

print("校正并标准化后的均值和标准差已保存到 './table/校正并标准化后的行为变量均值和标准差-数据集A.xlsx'.")


Excel 文件的列名: ['id', 'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc', 'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span', 'group', 'age', 'gender', 'education_years', 'dose equivalent to olanzapine']
   stroop_incongruent_rt  stroop_interference_effect_rt  nogo_acc  \
0            -738.242132                     735.613724 -0.655208   
1            -731.331369                     727.396617 -1.866666   
2            -737.677914                     732.902322 -1.586574   
3            -746.307821                     740.546734 -1.031932   
4            -735.930451                     729.185446 -2.799495   

   switch_cost  rm_1750_acc  rm_750_acc  dsbt_span  
0  -303.808399    -1.585067   -2.048285  -6.476738  
1  -285.191406     0.539985    0.791846  -5.722215  
2  -303.063392    -2.752286   -3.008174  -6.974526  
3  -285.788167    -1.405756   -3.196904  -6.491598  
4  -290.812975    -1.595482   -2.827024  -6.887146  
   stroop_incongruent_rt  stroop_interfere

## 校正数据集B前测

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# 读取Excel文件
patients_df = pd.read_excel('预处理后的数据集B前测.xlsx')  # SCZ 组
df = pd.read_excel('预处理后的数据集A.xlsx')  # 原始数据集

# 统一列名格式（小写，去空格，替换符号）
def clean_columns(df):
    df.columns = df.columns.str.strip().str.lower().str.replace('-', '_').str.replace(',', '')
    return df

patients_df = clean_columns(patients_df)
df = clean_columns(df)

# 筛选健康对照组（Group = 2）
control_df = df[df['group'] == 2].copy()

# 打印实际的列名，确保匹配
print("SCZ 数据集的列名:", patients_df.columns.tolist())
print("HC 数据集的列名:", control_df.columns.tolist())

# 重新定义 behavior_variables，确保与实际列名匹配
behavior_variables = [
    'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc',
    'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span'
]

# 确保 behavior_variables 在数据集中存在
def check_columns(df, dataset_name):
    missing_columns = [col for col in behavior_variables if col not in df.columns]
    if missing_columns:
        raise KeyError(f"{dataset_name} 数据集中缺少以下列，请检查拼写或格式: {missing_columns}")

check_columns(patients_df, "SCZ")
check_columns(control_df, "HC")

# 统一 Group 字段大小写
for df in [patients_df, control_df]:
    df['group'] = pd.to_numeric(df['group'], errors='coerce')
    df.dropna(subset=['group'], inplace=True)

# 确保数据集不为空
if patients_df.empty or control_df.empty:
    raise ValueError("筛选出的患者组或对照组为空，请检查 Group 字段的值是否正确！")

# 定义其他变量
continuous_variables = ['age', 'education_years']
categorical_variables = ['gender']

# One-hot encode categorical variables
def one_hot_encode(df, categorical_vars):
    for var in categorical_vars:
        if var in df.columns:
            dummies = pd.get_dummies(df[var], prefix=var, drop_first=True)
            df = pd.concat([df, dummies], axis=1)
            df.drop(var, axis=1, inplace=True)
    return df

patients_df = one_hot_encode(patients_df, categorical_variables)
control_df = one_hot_encode(control_df, categorical_variables)

# 标准化连续变量
scaler = StandardScaler()
control_df[continuous_variables] = scaler.fit_transform(control_df[continuous_variables])
patients_df[continuous_variables] = scaler.transform(patients_df[continuous_variables])

# 从对照组DataFrame中提取行为变量数据
control_data = control_df[behavior_variables]

# 使用对照组数据拟合 StandardScaler
scaler = StandardScaler()
scaler.fit(control_data)

# 使用对照组的标准化参数标准化患者组数据
standardized_patient_data = scaler.transform(patients_df[behavior_variables])
standardized_patient_df = pd.DataFrame(standardized_patient_data, columns=behavior_variables)

# 处理 Healthy Controls 组
standardized_control_data = scaler.transform(control_df[behavior_variables])
standardized_control_df = pd.DataFrame(standardized_control_data, columns=behavior_variables)

# 从数据框中获取所有人口学变量（已独热编码和标准化）
demographic_variables = [col for col in control_df.columns if col not in behavior_variables]

# 使用对照组数据拟合回归模型校正人口学效应
corrected_patient_data = pd.DataFrame()
corrected_control_data = pd.DataFrame()

for variable in behavior_variables:
    model = LinearRegression()
    model.fit(control_df[demographic_variables], control_df[variable])
    correction_patients = model.predict(patients_df[demographic_variables])
    correction_controls = model.predict(control_df[demographic_variables])
    corrected_patient_data[variable] = standardized_patient_df[variable] - correction_patients
    corrected_control_data[variable] = standardized_control_df[variable] - correction_controls

# 保存患者组和健康对照组的校正并标准化后的数据
corrected_patient_df = pd.DataFrame(corrected_patient_data)
corrected_control_df = pd.DataFrame(corrected_control_data)

corrected_patient_df.to_excel('./table/校正并标准化后的患者行为变量数据-数据集B前测.xlsx', index=False)
corrected_control_df.to_excel('./table/校正并标准化后的健康对照行为变量数据-数据集A.xlsx', index=False)

# 打印校正并标准化后的DataFrame
print(corrected_patient_df.head())
print(corrected_control_df.head())

print("校正并标准化完成，数据已保存到 './table/校正并标准化后的患者行为变量数据-数据集B前测.xlsx' 和 './table/校正并标准化后的健康对照行为变量数据-数据集A.xlsx'.")

# 计算校正并标准化后的均值和标准差
corrected_patient_means = corrected_patient_df.mean().round(2)
corrected_patient_stds = corrected_patient_df.std().round(2)
corrected_control_means = corrected_control_df.mean().round(2)
corrected_control_stds = corrected_control_df.std().round(2)

# 创建一个DataFrame来存储结果，以“均值±标准差”格式显示
summary_df = pd.DataFrame({
    'Corrected and Standardized Patient': corrected_patient_means.astype(str) + ' ± ' + corrected_patient_stds.astype(str),
    'Corrected and Standardized Control': corrected_control_means.astype(str) + ' ± ' + corrected_control_stds.astype(str)
}, index=behavior_variables)

# 打印结果
print(summary_df)

# 保存结果为Excel文件
summary_df.to_excel('./table/校正并标准化后的行为变量均值和标准差-数据集B前测.xlsx')

print("校正并标准化后的均值和标准差已保存到 './table/校正并标准化后的行为变量均值和标准差-数据集B前测.xlsx'.")

SCZ 数据集的列名: ['id', 'group', 'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc', 'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span', 'age', 'education_years', 'gender']
HC 数据集的列名: ['id', 'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc', 'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span', 'group', 'panss_n', 'panss_p', 'panss_gp', 'dose equivalent to olanzapine', 'age', 'gender', 'education_years', 'panss_t']


KeyError: "['panss_n', 'panss_p', 'panss_gp', 'dose equivalent to olanzapine', 'panss_t'] not in index"

## 数据集B后测

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# 读取Excel文件
patients_df = pd.read_excel('预处理后的数据集B后测.xlsx')  # SCZ 组
df = pd.read_excel('预处理后的数据集A.xlsx')  # 原始数据集

# 统一列名格式（小写，去空格，替换符号）
def clean_columns(df):
    df.columns = df.columns.str.strip().str.lower().str.replace('-', '_').str.replace(',', '')
    return df

patients_df = clean_columns(patients_df)
df = clean_columns(df)

# 筛选健康对照组（Group = 2）
control_df = df[df['group'] == 2].copy()

# 打印实际的列名，确保匹配
print("SCZ 数据集的列名:", patients_df.columns.tolist())
print("HC 数据集的列名:", control_df.columns.tolist())

# 重新定义 behavior_variables，确保与实际列名匹配
behavior_variables = [
    'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc',
    'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span'
]

# 确保 behavior_variables 在数据集中存在
def check_columns(df, dataset_name):
    missing_columns = [col for col in behavior_variables if col not in df.columns]
    if missing_columns:
        raise KeyError(f"{dataset_name} 数据集中缺少以下列，请检查拼写或格式: {missing_columns}")

check_columns(patients_df, "SCZ")
check_columns(control_df, "HC")

# 统一 Group 字段大小写
for df in [patients_df, control_df]:
    df['group'] = pd.to_numeric(df['group'], errors='coerce')
    df.dropna(subset=['group'], inplace=True)

# 确保数据集不为空
if patients_df.empty or control_df.empty:
    raise ValueError("筛选出的患者组或对照组为空，请检查 Group 字段的值是否正确！")

# 定义其他变量
continuous_variables = ['age', 'education_years']
categorical_variables = ['gender']

# One-hot encode categorical variables
def one_hot_encode(df, categorical_vars):
    for var in categorical_vars:
        if var in df.columns:
            dummies = pd.get_dummies(df[var], prefix=var, drop_first=True)
            df = pd.concat([df, dummies], axis=1)
            df.drop(var, axis=1, inplace=True)
    return df

patients_df = one_hot_encode(patients_df, categorical_variables)
control_df = one_hot_encode(control_df, categorical_variables)

# 标准化连续变量
scaler = StandardScaler()
control_df[continuous_variables] = scaler.fit_transform(control_df[continuous_variables])
patients_df[continuous_variables] = scaler.transform(patients_df[continuous_variables])

# 从对照组DataFrame中提取行为变量数据
control_data = control_df[behavior_variables]

# 使用对照组数据拟合 StandardScaler
scaler = StandardScaler()
scaler.fit(control_data)

# 使用对照组的标准化参数标准化患者组数据
standardized_patient_data = scaler.transform(patients_df[behavior_variables])
standardized_patient_df = pd.DataFrame(standardized_patient_data, columns=behavior_variables)

# 处理 Healthy Controls 组
standardized_control_data = scaler.transform(control_df[behavior_variables])
standardized_control_df = pd.DataFrame(standardized_control_data, columns=behavior_variables)

# 从数据框中获取所有人口学变量（已独热编码和标准化）
demographic_variables = [col for col in control_df.columns if col not in behavior_variables]

# 使用对照组数据拟合回归模型校正人口学效应
corrected_patient_data = pd.DataFrame()
corrected_control_data = pd.DataFrame()

for variable in behavior_variables:
    model = LinearRegression()
    model.fit(control_df[demographic_variables], control_df[variable])
    correction_patients = model.predict(patients_df[demographic_variables])
    correction_controls = model.predict(control_df[demographic_variables])
    corrected_patient_data[variable] = standardized_patient_df[variable] - correction_patients
    corrected_control_data[variable] = standardized_control_df[variable] - correction_controls

# 保存患者组和健康对照组的校正并标准化后的数据
corrected_patient_df = pd.DataFrame(corrected_patient_data)
corrected_control_df = pd.DataFrame(corrected_control_data)

corrected_patient_df.to_excel('./table/校正并标准化后的患者行为变量数据-数据集B后测.xlsx', index=False)
corrected_control_df.to_excel('./table/校正并标准化后的健康对照行为变量数据-数据集A.xlsx', index=False)

# 打印校正并标准化后的DataFrame
print(corrected_patient_df.head())
print(corrected_control_df.head())

print("校正并标准化完成，数据已保存到 './table/校正并标准化后的患者行为变量数据-数据集B后测.xlsx' 和 './table/校正并标准化后的健康对照行为变量数据-数据集A.xlsx'.")

# 计算校正并标准化后的均值和标准差
corrected_patient_means = corrected_patient_df.mean().round(2)
corrected_patient_stds = corrected_patient_df.std().round(2)
corrected_control_means = corrected_control_df.mean().round(2)
corrected_control_stds = corrected_control_df.std().round(2)

# 创建一个DataFrame来存储结果，以“均值±标准差”格式显示
summary_df = pd.DataFrame({
    'Corrected and Standardized Patient': corrected_patient_means.astype(str) + ' ± ' + corrected_patient_stds.astype(str),
    'Corrected and Standardized Control': corrected_control_means.astype(str) + ' ± ' + corrected_control_stds.astype(str)
}, index=behavior_variables)

# 打印结果
print(summary_df)

# 保存结果为Excel文件
summary_df.to_excel('./table/校正并标准化后的行为变量均值和标准差-数据集B后测.xlsx')

print("校正并标准化后的均值和标准差已保存到 './table/校正并标准化后的行为变量均值和标准差-数据集B后测.xlsx'.")

SCZ 数据集的列名: ['id', 'group', 'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc', 'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span', 'age', 'education_years', 'gender']
HC 数据集的列名: ['id', 'stroop_incongruent_rt', 'stroop_interference_effect_rt', 'nogo_acc', 'switch_cost', 'rm_1750_acc', 'rm_750_acc', 'dsbt_span', 'group', 'age', 'gender', 'education_years']
   stroop_incongruent_rt  stroop_interference_effect_rt  nogo_acc  \
0            -752.351951                     740.531060 -1.028934   
1            -766.634835                     747.550381 -0.385461   
2            -754.255204                     734.327982 -0.751407   
3            -750.947657                     731.416278 -1.400554   
4            -766.432796                     734.028803 -0.570903   

   switch_cost  rm_1750_acc  rm_750_acc  dsbt_span  
0  -315.714885    -0.409673   -2.009067  -3.814494  
1  -265.925093     0.129953    0.264539  -6.602839  
2  -292.207685     0.153232    0.076111  -5.9