## 中毒患者预测模型
- M1 预测急诊转入院患者是否死亡
- M2 预测急诊转入院患者是否未治愈

## 寻找最优随机种子

In [None]:

''' # 寻找最优随机种子
from tqdm import tqdm
import random

# 生成 10 个不重复的随机种子
seeds = random.sample(range(1, 1000), 10)
print(seeds)
seeds = [725, 181, 206, 693, 616, 679, 102, 355, 770, 174]

def model_10cv_Catboost(X,y,save_path,seed):
    skf = StratifiedKFold(n_splits=5)
    model = CatBoostClassifier(iterations=500, learning_rate=0.01, random_seed=seed)
    t = y.values
    allAUC = []
    allY = []
    allPY = []
    fold = 0
    for train_index, test_index in skf.split(np.zeros(len(t)),t):
        X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        model.fit(X_train,y_train)
        probs = model.predict_proba(X_test)
        y_tepred = probs[:,1]
        auc_te = metrics.roc_auc_score(y_test, y_tepred)
        print('auc_te = ', auc_te)
        allAUC.append(auc_te)
        allY.extend(y_test)
        allPY.extend(y_tepred)    
    
    meanAUC = np.mean(allAUC)
    print('====Catboost model: meanAUC={}'.format(meanAUC))
    print('====Catboost model: 95% CI=[{}, {}]'.format(np.percentile(allAUC,2.5), np.percentile(allAUC,97.5)))
    print('====Catboost model: meanAUC={}'.format(meanAUC))
    print('====Catboost model: 95% CI=[{}, {}]'.format(np.percentile(allAUC,2.5), np.percentile(allAUC,97.5)))
    predict_result = pd.DataFrame({'y_test':allY,'y_tepred':allPY})
    if os.path.exists(save_path + '/Catboost/')==False:
        os.makedirs(save_path + '/Catboost/')
    # predict_result.to_csv(save_path + f'/Catboost/predict_result_Catboost.csv',index=False)
    return model,predict_result,meanAUC

save_path = '/share/home/zhanxueyun/poisoning_file/raw_res/'
raw_data_path = pwd + 'rawdata_中毒数据_patient_last_1114.xlsx'
## 全部患者，20个特征，原始特征名
x_features_categorical_raw = ['Type of Poisoning','Degree of poisoning','Altered Consciousness or Syncope','Education Level']
x_features_continuous_raw = ['White Blood Cell Count','Alanine Aminotransferase (ALT)','Length of Stay','Total Bilirubin','Creatine Kinase-MB Isoenzyme','Age','Urea','Lactate Dehydrogenase (LDH)','Uric Acid','Albumin (First Measurement)','Albumin (Last Measurement)','Homocysteine','Hemoglobin Concentration','Direct Bilirubin','Creatine Kinase (CK)','Red Blood Cell Count','Mean Corpuscular Hemoglobin Concentration']
## 全部患者，20个特征，新的特征名
x_features_categorical_re = ['Type of Poisoning','Degree of poisoning','AMS or Sync','Education Level']
x_features_continuous_re = ['WBC','ALT','Length of Stay','Total Bilirubin','CK-MB','Age','Urea','LDH','Uric Acid','Alt-first','Alt-last','Homocysteine','Hgb','Direct Bilirubin','Creatine Kinase (CK)','RBC','MCHC']
# 数据处理   'Outcome_other' 是否死亡; 'Outcome'  是否未治愈
datax_miss, datay = prepare_data(raw_data_path , x_features_continuous_raw, x_features_categorical_raw, 'Outcome_other', False)  # 缺失值填充
datax_miss.columns = x_features_continuous_re + x_features_categorical_re
datax_miss[x_features_categorical_re] = datax_miss[x_features_categorical_re].astype('int64')
# --------------------------
# 主循环（带进度条）
# --------------------------
results = []
for seed in tqdm(seeds, desc="Processing seeds"):
    model, predict_result, meanAUC = model_10cv_Catboost(datax_miss, datay, save_path, seed)
    results.append({"Seed": seed, "AUROC": meanAUC})
# 输出结果
results_df = pd.DataFrame(results)
results_df.to_csv(save_path + 'seed_auc_1.csv')
print(results_df)
print(f"平均AUROC: {results_df['AUROC'].mean():.4f}")
'''


## M1（死亡模型）

In [14]:

import numpy as np
import pandas as pd
from sklearn import metrics
from matplotlib.font_manager import FontProperties
#import shap
import joblib
from sklearn.model_selection import StratifiedKFold
# import xgboost
import os
from catboost import *
import warnings
warnings.filterwarnings("ignore")

font = FontProperties(fname='/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc')

raw_data_path = 'rawdata_中毒数据_patient_last_1114.xlsx'

save_path = 'M1_compare/'
if not os.path.exists(save_path):
    os.makedirs(save_path)
if not os.path.exists(save_path + '/models_compare_results.txt'):
    with open(save_path + '/models_compare_results.txt', "w") as file:
        pass
log = open(save_path + '/models_compare_results.txt', mode="a+", encoding="utf-8")

## 连续变量缺失值处理函数定义
def data_fillna(X, x_categorical, x_continuous):
    """
    数据缺失值处理函数
    参数:X: pandas DataFrame对象
        x_categorical: 分类变量列表
        x_continuous: 连续变量列表
    返回:X: 处理后的pandas DataFrame对象
    """
    for col in x_categorical:
        X[col] = (X[col].fillna(0)).astype('int64')
    for col in x_continuous:
        # mean_val = X[column].mean()
        mean_val = X[col].median()
        X[col].fillna(mean_val,inplace=True)
    return X
## 数据集准备函数定义,
def prepare_data(raw_data_path, x_continuous, x_categorical, y_column, isnan_flag=True, type_flag=None):
    """
    准备数据集，包括数据预处理和特征工程
    参数: raw_data_path: 原始数据文件路径
        x_continuous: 连续变量列表
        x_categorical: 分类变量列表
        y_column: 目标变量列名
        isnan_flag: 是否缺失值，默认为True
    返回:datax: 特征数据
        datay: 目标变量
    """
    rawdata = pd.read_excel(raw_data_path)
    rawdata = rawdata.sample(frac=1, random_state=42)
    if type_flag:
        rawdata = rawdata[rawdata['Type of Poisoning'] == type_flag]
    else:
        pass
    if  isnan_flag:
        for col in x_categorical:
            rawdata[col] = (rawdata[col].fillna(0)).astype('int64')
    else:
        rawdata =data_fillna(rawdata, x_categorical, x_continuous)
    datax = rawdata[x_continuous + x_categorical]
    datay = rawdata[y_column]
    return datax, datay

def model_10cv_Catboost(X,y,save_path,seed):
    skf = StratifiedKFold(n_splits=5)
    model = CatBoostClassifier(iterations=500, learning_rate=0.01, random_seed=seed)  ## 最优种子 seed=616
    t = y.values
    allAUC = []
    allY = []
    allPY = []
    fold = 0
    for train_index, test_index in skf.split(np.zeros(len(t)),t):
        X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        model.fit(X_train,y_train)
        probs = model.predict_proba(X_test)
        y_tepred = probs[:,1]
        auc_te = metrics.roc_auc_score(y_test, y_tepred)
        print('auc_te = ', auc_te)
        allAUC.append(auc_te)
        allY.extend(y_test)
        allPY.extend(y_tepred)
        ## 保存模型文件
        fold += 1
        if not os.path.exists(save_path +'models/'):
            os.makedirs(save_path +'models/')
        joblib.dump(model, save_path +'models/'+ f'Catboost_model_fold_{fold}.pkl')        

    meanAUC = np.mean(allAUC)
    print('====Catboost model: meanAUC={}'.format(meanAUC))
    print('====Catboost model: 95% CI=[{}, {}]'.format(np.percentile(allAUC, 2.5), np.percentile(allAUC, 97.5)))
    print('====Catboost model: meanAUC={}'.format(meanAUC), file=log)
    print('====Catboost model: 95% CI=[{}, {}]'.format(np.percentile(allAUC, 2.5), np.percentile(allAUC, 97.5)),
          file=log)
    print('====Catboost model: AUCS={}'.format(allAUC), file=log)

    predict_result = pd.DataFrame({'y_test':allY,'y_tepred':allPY})
    if os.path.exists(save_path + '/Catboost/')==False:
        os.makedirs(save_path + '/Catboost/')
    predict_result.to_csv(save_path + f'/Catboost/predict_result_Catboost.csv',index=False)
    return model,predict_result

############################################### 当前数据源配置 ##############################################

## 全部患者，20个特征，原始特征名
x_features_categorical_raw = ['Type of Poisoning','Degree of poisoning','Altered Consciousness or Syncope','Education Level']
x_features_continuous_raw = ['White Blood Cell Count','Alanine Aminotransferase (ALT)','Length of Stay','Total Bilirubin','Creatine Kinase-MB Isoenzyme','Age','Urea','Lactate Dehydrogenase (LDH)','Uric Acid','Albumin (First Measurement)','Homocysteine','Hemoglobin Concentration','Direct Bilirubin','Creatine Kinase (CK)','Red Blood Cell Count','Mean Corpuscular Hemoglobin Concentration']
# 数据处理
datax_miss, datay = prepare_data(
    raw_data_path,
    x_continuous=x_features_continuous_raw,
    x_categorical=x_features_categorical_raw,
    y_column='Outcome_other',
    isnan_flag=False
)  # 缺失值填充

# 保证分类列为整数类型
datax_miss[x_features_categorical_raw] = datax_miss[x_features_categorical_raw].astype('int64')

seed = 616  # 最优随机种子
model_Catboost, predict_result_Catboost = model_10cv_Catboost(datax_miss, datay, save_path, seed)

log.close()

0:	learn: 0.6811958	total: 8.61ms	remaining: 4.3s
1:	learn: 0.6712468	total: 9.47ms	remaining: 2.36s
2:	learn: 0.6604511	total: 11.2ms	remaining: 1.85s
3:	learn: 0.6485047	total: 12.9ms	remaining: 1.6s
4:	learn: 0.6378214	total: 14.8ms	remaining: 1.46s
5:	learn: 0.6278582	total: 16.9ms	remaining: 1.39s
6:	learn: 0.6184158	total: 18.6ms	remaining: 1.31s
7:	learn: 0.6085609	total: 20.5ms	remaining: 1.26s
8:	learn: 0.5988066	total: 22.7ms	remaining: 1.24s
9:	learn: 0.5898294	total: 24ms	remaining: 1.18s
10:	learn: 0.5797843	total: 25.7ms	remaining: 1.14s
11:	learn: 0.5695050	total: 27.5ms	remaining: 1.12s
12:	learn: 0.5603987	total: 29.7ms	remaining: 1.11s
13:	learn: 0.5523071	total: 31.9ms	remaining: 1.11s
14:	learn: 0.5440108	total: 45.2ms	remaining: 1.46s
15:	learn: 0.5347530	total: 47.8ms	remaining: 1.45s
16:	learn: 0.5273017	total: 50.1ms	remaining: 1.42s
17:	learn: 0.5188146	total: 52ms	remaining: 1.39s
18:	learn: 0.5102009	total: 53.6ms	remaining: 1.36s
19:	learn: 0.5025502	total: 

### shap 分析

In [15]:
import os
import numpy as np
import pandas as pd
import shap
import warnings
warnings.filterwarnings("ignore")
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap


def analyze_model_dataset_m1(model, X, save_path):
    """
    为M1模型生成SHAP值和图，并保存相关文件（修正版，支持CatBoost）
    """
    output_dir = os.path.join(save_path, 'SHAP')
    os.makedirs(output_dir, exist_ok=True)

    # 1️⃣ 计算SHAP值
    explainer = shap.Explainer(model)
    shap_values = explainer(X)

    # ✅ CatBoost 二分类可能返回 [class0, class1]
    if isinstance(shap_values, list):
        shap_values = shap_values[1]

    # 2️⃣ 保存 SHAP 值矩阵
    shap_df = pd.DataFrame(shap_values.values, columns=X.columns)
    shap_df.to_csv(os.path.join(output_dir, 'shap_values.csv'), index=False)

    # 3️⃣ 保存原始输入数据（对应每个样本）
    shap_data_df = pd.DataFrame(shap_values.data, columns=X.columns)
    shap_data_df.to_csv(os.path.join(output_dir, 'shap_data.csv'), index=False)

    # 4️⃣ 计算并保存特征重要性
    feature_importance = pd.DataFrame({
        'Feature': X.columns,
        'Importance': np.abs(shap_values.values).mean(axis=0)  # ✅ 修正这里
    }).sort_values('Importance', ascending=False)
    feature_importance.to_csv(os.path.join(output_dir, 'feature_importance.csv'), index=False)

    # 5️⃣ 绘制 Summary Plot
    cmap = get_cmap('cool')
    plt.figure(figsize=(6, 8))
    shap.summary_plot(shap_values, X, cmap=cmap, show=False)
    plt.title("(M1) SHAP Feature Importance", fontsize=15)
    plt.xlabel("SHAP Value (Impact on model output)", fontsize=12)
    plt.tight_layout(pad=2.0)
    plt.savefig(os.path.join(output_dir, 'shap_summary_plot.png'), dpi=300, bbox_inches='tight')
    plt.close()

    print("✅ SHAP 文件已保存至:", output_dir)
    return explainer, shap_values, shap_df, feature_importance


# ======================================
# 使用示例
# ======================================
save_path = 'M1_compare/'  # M1模型路径
explainer_m1, shap_values_m1, shap_df_m1, feature_importance_m1 = analyze_model_dataset_m1(
    model_Catboost, datax_miss, save_path
)


✅ SHAP 文件已保存至: M1_compare/SHAP


------

## M2（未恢复模型）

In [16]:
#预测值
# -*- coding: utf-8 -*-
import copy
import re

import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn import metrics
from sklearn.model_selection import train_test_split
#import shap
import pickle
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from matplotlib.font_manager import FontProperties
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
# import xgboost
import os
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import precision_recall_fscore_support
from sklearn.neural_network import MLPClassifier
import catboost
from catboost import *
import warnings
warnings.filterwarnings("ignore")

font = FontProperties(fname='/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc')

raw_data_path = 'rawdata_中毒数据_patient_last_1114.xlsx'

save_path = 'M2_compare/'

if not os.path.exists(save_path):
    os.makedirs(save_path)
if not os.path.exists(save_path + '/models_compare_results.txt'):
    with open(save_path + '/models_compare_results.txt', "w") as file:
        pass
log = open(save_path + '/models_compare_results.txt', mode="a+", encoding="utf-8")

## 连续变量缺失值处理函数定义
def data_fillna(X, x_categorical, x_continuous):
    """
    数据缺失值处理函数
    参数:X: pandas DataFrame对象
        x_categorical: 分类变量列表
        x_continuous: 连续变量列表
    返回:X: 处理后的pandas DataFrame对象
    """
    for col in x_categorical:
        X[col] = (X[col].fillna(0)).astype('int64')
    for col in x_continuous:
        # mean_val = X[column].mean()
        mean_val = X[col].median()
        X[col].fillna(mean_val,inplace=True)
    return X
## 数据集准备函数定义,
def prepare_data(raw_data_path, x_continuous, x_categorical, y_column, isnan_flag=True, type_flag=None):
    """
    准备数据集，包括数据预处理和特征工程
    参数: raw_data_path: 原始数据文件路径
        x_continuous: 连续变量列表
        x_categorical: 分类变量列表
        y_column: 目标变量列名
        isnan_flag: 是否缺失值，默认为True
    返回:datax: 特征数据
        datay: 目标变量
    """
    rawdata = pd.read_excel(raw_data_path)
    rawdata = rawdata.sample(frac=1, random_state=42)
    if type_flag:
        rawdata = rawdata[rawdata['Type of Poisoning'] == type_flag]
    else:
        pass
    if  isnan_flag:
        for col in x_categorical:
            rawdata[col] = (rawdata[col].fillna(0)).astype('int64')
    else:
        rawdata =data_fillna(rawdata, x_categorical, x_continuous)
    datax = rawdata[x_continuous + x_categorical]
    datay = rawdata[y_column]
    return datax, datay

def model_10cv_Catboost(X,y,save_path,seed):
    skf = StratifiedKFold(n_splits=5)
    model = CatBoostClassifier(iterations=500, learning_rate=0.01, random_seed=seed)
    t = y.values
    allAUC = []
    allY = []
    allPY = []
    fold = 0
    for train_index, test_index in skf.split(np.zeros(len(t)),t):
        X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        model.fit(X_train,y_train)
        probs = model.predict_proba(X_test)
        y_tepred = probs[:,1]
        auc_te = metrics.roc_auc_score(y_test, y_tepred)
        print('auc_te = ', auc_te)
        allAUC.append(auc_te)
        allY.extend(y_test)
        allPY.extend(y_tepred)
        ## 保存模型文件
        fold += 1
        if not os.path.exists(save_path +'models/'):
            os.makedirs(save_path +'models/')
        joblib.dump(model, save_path +'models/'+ f'Catboost_model_fold_{fold}.pkl')        
    
    meanAUC = np.mean(allAUC)
    print('====Catboost model: meanAUC={}'.format(meanAUC))
    print('====Catboost model: 95% CI=[{}, {}]'.format(np.percentile(allAUC, 2.5), np.percentile(allAUC, 97.5)))
    print('====Catboost model: meanAUC={}'.format(meanAUC), file=log)
    print('====Catboost model: 95% CI=[{}, {}]'.format(np.percentile(allAUC, 2.5), np.percentile(allAUC, 97.5)),
          file=log)
    print('====Catboost model: AUCS={}'.format(allAUC), file=log)
    
    
    predict_result = pd.DataFrame({'y_test':allY,'y_tepred':allPY})
    if os.path.exists(save_path + '/Catboost/')==False:
        os.makedirs(save_path + '/Catboost/')
    predict_result.to_csv(save_path + f'/Catboost/predict_result_Catboost.csv',index=False)
    return model,predict_result

############################################### 当前数据源配置 ##############################################
x_features_categorical_raw = ['Type of Poisoning','Degree of poisoning','Vomiting']
x_features_continuous_raw = ['Age','Length of Stay','Weight','Diastolic Blood Pressure','White Blood Cell Count','Mean Corpuscular Volume','Alanine Aminotransferase (ALT)','Total Bilirubin','Lactate Dehydrogenase (LDH)','Urea','Uric Acid','Creatine Kinase (CK)','Creatine Kinase-MB Isoenzyme','High-Sensitivity C-Reactive Protein (hs-CRP)','Blood Cholinesterase Test Results','Albumin (First Measurement)','Albumin (Last Measurement)']
datax_miss, datay = prepare_data(raw_data_path , x_features_continuous_raw, x_features_categorical_raw, 'Outcome', False)  # 缺失值填充

# 保证分类列为整数类型
datax_miss[x_features_categorical_raw] = datax_miss[x_features_categorical_raw].astype('int64')

seed = 679 # 最优随机种子
model_Catboost,predict_result_Catboost = model_10cv_Catboost(datax_miss,datay,save_path,seed)  # 缺失值填充

log.close()

0:	learn: 0.6887081	total: 1.52ms	remaining: 759ms
1:	learn: 0.6834519	total: 4.64ms	remaining: 1.16s
2:	learn: 0.6774121	total: 6.88ms	remaining: 1.14s
3:	learn: 0.6723235	total: 15.1ms	remaining: 1.88s
4:	learn: 0.6660194	total: 16.9ms	remaining: 1.67s
5:	learn: 0.6598286	total: 19.1ms	remaining: 1.57s
6:	learn: 0.6543068	total: 20.8ms	remaining: 1.46s
7:	learn: 0.6493594	total: 22.5ms	remaining: 1.38s
8:	learn: 0.6456052	total: 23.3ms	remaining: 1.27s
9:	learn: 0.6405086	total: 25ms	remaining: 1.22s
10:	learn: 0.6354776	total: 26.8ms	remaining: 1.19s
11:	learn: 0.6299967	total: 28.6ms	remaining: 1.16s
12:	learn: 0.6257034	total: 30.3ms	remaining: 1.14s
13:	learn: 0.6204994	total: 32ms	remaining: 1.11s
14:	learn: 0.6158967	total: 33.7ms	remaining: 1.09s
15:	learn: 0.6104843	total: 35.4ms	remaining: 1.07s
16:	learn: 0.6061524	total: 37ms	remaining: 1.05s
17:	learn: 0.6017407	total: 38.7ms	remaining: 1.04s
18:	learn: 0.5970802	total: 40.3ms	remaining: 1.02s
19:	learn: 0.5929344	total: 

### SHAP分析

In [18]:
import os
import numpy as np
import pandas as pd
import shap
import warnings
warnings.filterwarnings("ignore")
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap


def analyze_model_dataset_m1(model, X, save_path):
    """
    为M1模型生成SHAP值和图，并保存相关文件（修正版，支持CatBoost）
    """
    output_dir = os.path.join(save_path, 'SHAP')
    os.makedirs(output_dir, exist_ok=True)

    # 1️⃣ 计算SHAP值
    explainer = shap.Explainer(model)
    shap_values = explainer(X)

    # ✅ CatBoost 二分类可能返回 [class0, class1]
    if isinstance(shap_values, list):
        shap_values = shap_values[1]

    # 2️⃣ 保存 SHAP 值矩阵
    shap_df = pd.DataFrame(shap_values.values, columns=X.columns)
    shap_df.to_csv(os.path.join(output_dir, 'shap_values.csv'), index=False)

    # 3️⃣ 保存原始输入数据（对应每个样本）
    shap_data_df = pd.DataFrame(shap_values.data, columns=X.columns)
    shap_data_df.to_csv(os.path.join(output_dir, 'shap_data.csv'), index=False)

    # 4️⃣ 计算并保存特征重要性
    feature_importance = pd.DataFrame({
        'Feature': X.columns,
        'Importance': np.abs(shap_values.values).mean(axis=0)  # ✅ 修正这里
    }).sort_values('Importance', ascending=False)
    feature_importance.to_csv(os.path.join(output_dir, 'feature_importance.csv'), index=False)

    # 5️⃣ 绘制 Summary Plot
    cmap = get_cmap('cool')
    plt.figure(figsize=(6, 8))
    shap.summary_plot(shap_values, X, cmap=cmap, show=False)
    plt.title("(M2) SHAP Feature Importance", fontsize=15)
    plt.xlabel("SHAP Value (Impact on model output)", fontsize=12)
    plt.tight_layout(pad=2.0)
    plt.savefig(os.path.join(output_dir, 'shap_summary_plot.png'), dpi=300, bbox_inches='tight')
    plt.close()

    print("✅ SHAP 文件已保存至:", output_dir)
    return explainer, shap_values, shap_df, feature_importance


# ======================================
# 使用示例
# ======================================
save_path = 'M2_compare/'  # M1模型路径
explainer_m1, shap_values_m1, shap_df_m1, feature_importance_m1 = analyze_model_dataset_m1(
    model_Catboost, datax_miss, save_path
)


✅ SHAP 文件已保存至: M2_compare/SHAP


In [1]:
import pandas as pd
import numpy as np

raw_data_path = 'rawdata_中毒数据_patient_last_1114.xlsx'
df = pd.read_excel(raw_data_path)


# 对于数值列，可以查看更详细的统计信息
numeric_columns = df.select_dtypes(include=[np.number]).columns
print("\n数值列的详细统计:")
for col in numeric_columns:
    print(f"\n{col}:")
    print(f"  最小值: {df[col].min()}")
    print(f"  最大值: {df[col].max()}")
    print(f"  平均值: {df[col].mean():.2f}")
    print(f"  中位数: {df[col].median()}")
    print(f"  标准差: {df[col].std():.2f}")



数值列的详细统计:

Hospital ID:
  最小值: 5000152
  最大值: 5536020
  平均值: 5253341.48
  中位数: 5252331.0
  标准差: 149266.76

Gender:
  最小值: 0
  最大值: 1
  平均值: 0.47
  中位数: 0.0
  标准差: 0.50

Age:
  最小值: 6
  最大值: 92
  平均值: 43.90
  中位数: 45.0
  标准差: 21.31

Length of Stay:
  最小值: 1.0
  最大值: 28.0
  平均值: 5.42
  中位数: 4.0
  标准差: 4.71

Education Level:
  最小值: 1.0
  最大值: 5.0
  平均值: 2.89
  中位数: 3.0
  标准差: 1.00

Residential address:
  最小值: 0
  最大值: 2
  平均值: 0.73
  中位数: 1.0
  标准差: 0.70

Weight:
  最小值: 22.0
  最大值: 100.0
  平均值: 62.78
  中位数: 60.0
  标准差: 12.83

Systolic Blood Pressure:
  最小值: 53.0
  最大值: 193.0
  平均值: 123.29
  中位数: 122.0
  标准差: 21.95

Diastolic Blood Pressure:
  最小值: 21.0
  最大值: 171.0
  平均值: 75.83
  中位数: 76.0
  标准差: 16.12

Heart Rate:
  最小值: 39
  最大值: 173
  平均值: 89.68
  中位数: 86.0
  标准差: 19.17

Respiratory Rate:
  最小值: 7.0
  最大值: 60.0
  平均值: 20.14
  中位数: 20.0
  标准差: 2.22

Type of Poisoning:
  最小值: 1
  最大值: 4
  平均值: 2.38
  中位数: 3.0
  标准差: 0.86

White Blood Cell Count:
  最小值: 2.0
  最大值: 46.8
  平均值: 9.96
  中位数: