In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import matplotlib.pyplot as plt
import time

# 自定义 MAPE 和 sMAPE 计算函数
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    return 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true)))

# 加载数据
def load_data(file_path, target_column):
    df = pd.read_excel(file_path)
    X = df.drop(columns=[target_column])
    y = df[target_column]
    X = pd.get_dummies(X)  # 处理类别变量
    return X, y

# 划分数据集
def split_data(X, y):
    X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.25, random_state=42)
    return X_train, X_val, X_test, y_train, y_val, y_test


# 模型评估函数，返回评估指标
def evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test, model_name, param_grid=None):
    if param_grid:
        # 超参数调优
        grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        print(f"Best parameters for {model_name}: {grid_search.best_params_}")
    else:
        best_model = model
        best_model.fit(X_train, y_train)
    
    # 测量推理时间
    start_time = time.time()
    test_preds = best_model.predict(X_test)
    end_time = time.time()
    
    # 计算推理时间和吞吐量
    inference_time = end_time - start_time
    throughput = len(X_test) / inference_time

    # 计算评估指标
    mse = mean_squared_error(y_test, test_preds)
    mape = mean_absolute_percentage_error(y_test, test_preds)
    smape = symmetric_mean_absolute_percentage_error(y_test, test_preds)

    # 返回模型的评估指标
   

    # 输出结果
    print(f"{model_name} Performance on Test Set:")
    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Mean Absolute Percentage Error (MAPE): {mape}%")
    print(f"Symmetric Mean Absolute Percentage Error (sMAPE): {smape}%")
    
    # 输出推理时间和吞吐量
    print(f"Inference Time: {inference_time} seconds")
    print(f"Throughput: {throughput} samples/second")

    return test_preds,mse,mape,smape,inference_time,throughput






   


In [2]:
# 数据路径和目标列
data_path = 'D:/疏散数据汇总/疏散时间.xlsx'  # 请根据你的路径调整
target_column = 'Total evacuation time for 95% of evacuees'

# 加载并划分数据
X, y = load_data(data_path, target_column)
X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, y)

随机森林

In [3]:
# 定义模型和参数
rf_model = RandomForestRegressor(random_state=42,n_estimators=100,max_depth=10,min_samples_leaf=1,min_samples_split=2 )

# 评估随机森林模型
rf_preds = evaluate_model(rf_model, X_train, y_train, X_val, y_val, X_test, y_test, 'RandomForest')


RandomForest Performance on Test Set:
Mean Squared Error (MSE): 7232.968442124467
Mean Absolute Percentage Error (MAPE): 11.553346580949626%
Symmetric Mean Absolute Percentage Error (sMAPE): 11.278411611890347%
Inference Time: 0.00899052619934082 seconds
Throughput: 48050.58017979793 samples/second


In [4]:
rf_preds[1:-1]

(7232.968442124467,
 11.553346580949626,
 11.278411611890347,
 0.00899052619934082)

xgboost

In [5]:
# 定义模型和参数
xgb_model = XGBRegressor(random_state=42,learning_rate= 0.05, max_depth= 3, n_estimators= 200)

# 评估XGBoost模型
xgb_preds = evaluate_model(xgb_model, X_train, y_train, X_val, y_val, X_test, y_test, 'XGBoost')


XGBoost Performance on Test Set:
Mean Squared Error (MSE): 6174.6646275039875
Mean Absolute Percentage Error (MAPE): 9.917478903269085%
Symmetric Mean Absolute Percentage Error (sMAPE): 9.785413484736171%
Inference Time: 0.003999948501586914 seconds
Throughput: 108001.39047505513 samples/second


LightGBM

In [6]:
# 定义模型和参数
lgbm_model = LGBMRegressor(random_state=42,learning_rate= 0.05, n_estimators=200, num_leaves= 20)
lgbm_param_grid = [
    {'n_estimators': [100, 200], 'num_leaves': [20, 31], 'learning_rate': [0.05, 0.1]}
]

# 评估LightGBM模型
lgbm_preds = evaluate_model(lgbm_model, X_train, y_train, X_val, y_val, X_test, y_test, 'LightGBM')


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000225 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 30
[LightGBM] [Info] Number of data points in the train set: 1296, number of used features: 6
[LightGBM] [Info] Start training from score 455.633022
LightGBM Performance on Test Set:
Mean Squared Error (MSE): 6332.062295648927
Mean Absolute Percentage Error (MAPE): 9.817051977704788%
Symmetric Mean Absolute Percentage Error (sMAPE): 9.660965377374433%
Inference Time: 0.0010390281677246094 seconds
Throughput: 415773.13630105555 samples/second


GBDT

In [9]:
# 定义模型和参数
gbdt_model = GradientBoostingRegressor(random_state=42,learning_rate=0.1, max_depth= 3, n_estimators= 100)


# 评估GBDT模型
gbdt_preds = evaluate_model(gbdt_model, X_train, y_train, X_val, y_val, X_test, y_test, 'GBDT')


GBDT Performance on Test Set:
Mean Squared Error (MSE): 6032.521844158449
Mean Absolute Percentage Error (MAPE): 9.838344764250579%
Symmetric Mean Absolute Percentage Error (sMAPE): 9.694712596360132%
Inference Time: 0.0034682750701904297 seconds
Throughput: 124557.59455557847 samples/second


AdaBoost

In [10]:
# 定义模型和参数
ada_model = AdaBoostRegressor(random_state=42,learning_rate= 0.1, n_estimators=200)


# 评估AdaBoost模型
ada_preds = evaluate_model(ada_model, X_train, y_train, X_val, y_val, X_test, y_test, 'AdaBoost' )


AdaBoost Performance on Test Set:
Mean Squared Error (MSE): 11472.034809348976
Mean Absolute Percentage Error (MAPE): 19.477741470000613%
Symmetric Mean Absolute Percentage Error (sMAPE): 17.34554073051887%
Inference Time: 0.016278505325317383 seconds
Throughput: 26538.063008040775 samples/second


SVM

In [11]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# 定义模型和参数（缩小网格）
svm_model = SVR(C= 10, epsilon=0.1)


# 评估SVM模型
svm_preds = evaluate_model(svm_model, X_train, y_train, X_val, y_val, X_test, y_test, 'SVM')


SVM Performance on Test Set:
Mean Squared Error (MSE): 37538.448841481884
Mean Absolute Percentage Error (MAPE): 27.921595314701836%
Symmetric Mean Absolute Percentage Error (sMAPE): 28.045033126567027%
Inference Time: 0.02962040901184082 seconds
Throughput: 14584.538647906824 samples/second


神经网络

In [12]:
# 定义模型和参数
nn_model = MLPRegressor(random_state=42, hidden_layer_sizes=100, learning_rate_init= 0.01)
nn_param_grid = [
    {'hidden_layer_sizes': [(50, 50), (100,)], 'activation': ['relu', 'tanh'], 'learning_rate_init': [0.001, 0.01]}
]

# 评估神经网络模型
nn_preds = evaluate_model(nn_model, X_train, y_train, X_val, y_val, X_test, y_test, 'NeuralNetwork')


NeuralNetwork Performance on Test Set:
Mean Squared Error (MSE): 10316.19416151316
Mean Absolute Percentage Error (MAPE): 13.249480432371008%
Symmetric Mean Absolute Percentage Error (sMAPE): 13.61400441339137%
Inference Time: 0.001003265380859375 seconds
Throughput: 430593.94676806085 samples/second


决策树

In [13]:
# 定义模型和参数
dt_model = DecisionTreeRegressor(random_state=42,max_depth= 10, min_samples_leaf= 4, min_samples_split= 2)
dt_param_grid = [
    {'max_depth': [10, 20], 'min_samples_split': [2, 5], 'min_samples_leaf': [1, 4]}
]

# 评估决策树模型
dt_preds = evaluate_model(dt_model, X_train, y_train, X_val, y_val, X_test, y_test, 'DecisionTree')


DecisionTree Performance on Test Set:
Mean Squared Error (MSE): 8655.627742107341
Mean Absolute Percentage Error (MAPE): 13.48259099073405%
Symmetric Mean Absolute Percentage Error (sMAPE): 13.129731089009312%
Inference Time: 0.0010058879852294922 seconds
Throughput: 429471.27945010667 samples/second


线性回归

In [14]:
# 定义线性回归模型
lr_model = LinearRegression()

# 评估线性回归模型
lr_preds = evaluate_model(lr_model, X_train, y_train, X_val, y_val, X_test, y_test, 'LinearRegression')


LinearRegression Performance on Test Set:
Mean Squared Error (MSE): 17120.459992005963
Mean Absolute Percentage Error (MAPE): 19.77535267092906%
Symmetric Mean Absolute Percentage Error (sMAPE): 18.945868177914118%
Inference Time: 0.000997304916381836 seconds
Throughput: 433167.42242409755 samples/second


In [15]:
from lightgbm import LGBMRegressor
from sklearn.base import BaseEstimator, RegressorMixin
import numpy as np

# Attention风格的LightGBM包装器（用于学习特征重要性）
class AttentionLightGBM(BaseEstimator, RegressorMixin):
    def __init__(self, **kwargs):
        self.model = LGBMRegressor(**kwargs)
        self.feature_importances_ = None

    def fit(self, X, y):
        self.model.fit(X, y)
        self.feature_importances_ = self.model.feature_importances_
        attention_weights = self.feature_importances_ / np.sum(self.feature_importances_)
        self.attention_weights_ = attention_weights
        return self

    def predict(self, X):
        return self.model.predict(X)

# 实例化 Attention-LightGBM 模型
attention_lgbm_model = AttentionLightGBM(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=7,
    random_state=42
)

# 调用你原有的评估函数
attention_lgbm_preds = evaluate_model(
    attention_lgbm_model,
    X_train, y_train,
    X_val, y_val,
    X_test, y_test,
    'Attention_LightGBM'
)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000020 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 30
[LightGBM] [Info] Number of data points in the train set: 1296, number of used features: 6
[LightGBM] [Info] Start training from score 455.633022
Attention_LightGBM Performance on Test Set:
Mean Squared Error (MSE): 6305.598575875728
Mean Absolute Percentage Error (MAPE): 9.752583511987629%
Symmetric Mean Absolute Percentage Error (sMAPE): 9.640048801044303%
Inference Time: 0.0020017623901367188 seconds
Throughput: 215809.8294425917 samples/second


In [16]:
# 1. 从Excel中加载成对比较矩阵，并计算AHP权重

metrics_results = {
    'RandomForest': rf_preds[1:-1],  # RandomForest模型的评估结果
    'XGBoost': xgb_preds[1:-1],  # XGBoost模型的评估结果
    'LightGBM': lgbm_preds[1:-1],  # LightGBM模型的评估结果
    'GBDT': gbdt_preds[1:-1],  # GBDT模型的评估结果
    'AdaBoost': ada_preds[1:-1],  # AdaBoost模型的评估结果
    'SVM': svm_preds[1:-1],  # SVM模型的评估结果
    'NeuralNetwork': nn_preds[1:-1],  # Neural Network模型的评估结果
    'DecisionTree': dt_preds[1:-1],  # 决策树模型的评估结果
    'LinearRegression': lr_preds[1:-1],  # 线性回归模型的评估结果
    'Attention_LightGBM': attention_lgbm_preds[1:-1]
}
df = pd.DataFrame(metrics_results)
df = df.T
print(df)
#df.to_excel(r'D:\疏散数据汇总\matrix_output.xlsx')
gamma_matrix=df.values

# 定义负相关指标的列索引 (MSE, MAPE, sMAPE, Inference Time)
negative_indices = [0, 1, 2, 3]  # MSE, MAPE, sMAPE, Inference Time 列

# 对负相关指标进行反转处理
adjusted_matrix = gamma_matrix.copy()
adjusted_matrix[:, negative_indices] = 1 / adjusted_matrix[:, negative_indices]

# 计算每列的 η 值
eta = adjusted_matrix.sum(axis=0) / adjusted_matrix
print("Adjusted Metrics (η):")
print(eta)

# 计算比较矩阵 ξ
comparison_matrix = np.array([
    [adjusted_matrix[i, j] / adjusted_matrix[:, j].sum() for j in range(adjusted_matrix.shape[1])]
    for i in range(adjusted_matrix.shape[0])
])
print("\nComparison Matrix (ξ):")
print(comparison_matrix)

# 计算归一化矩阵 φ
normalized_matrix = np.array([
    [comparison_matrix[i, j] / comparison_matrix[:, j].sum() for j in range(comparison_matrix.shape[1])]
    for i in range(comparison_matrix.shape[0])
])
print("\nNormalized Matrix (φ):")
print(normalized_matrix)
#dt = pd.DataFrame(normalized_matrix)
#dt.to_excel(r'D:\疏散数据汇总\指标权重结果\对照组\matrix_output1.xlsx')

def read_expert_matrices(file_paths):
    expert_matrices = []
    for file_path in file_paths:
        # 读取 Excel 文件并转换为 NumPy 数组
        matrix = pd.read_excel(file_path, index_col=0).values
        expert_matrices.append(matrix)
    return expert_matrices

# 设定5个专家Excel文件的路径
file_paths = [
    'D:\疏散数据汇总\指标权重结果\场景1：精准预测场景\expert1.xlsx',
    'D:\疏散数据汇总\指标权重结果\场景1：精准预测场景\expert2.xlsx',
    'D:\疏散数据汇总\指标权重结果\场景1：精准预测场景\expert3.xlsx',
    'D:\疏散数据汇总\指标权重结果\场景1：精准预测场景\expert4.xlsx',
    'D:\疏散数据汇总\指标权重结果\场景1：精准预测场景\expert5.xlsx'
]

# 读取成对比较矩阵
expert_matrices = read_expert_matrices(file_paths)


# 几何平均法计算权重
def calculate_weights(matrix):
    column_sums = matrix.sum(axis=0)
    normalized_matrix_ = matrix / column_sums
    row_geometric_mean = np.prod(normalized_matrix_, axis=1) ** (1 / normalized_matrix_.shape[1])
    weights = row_geometric_mean / row_geometric_mean.sum()
    return weights

# 计算所有专家的成对比较矩阵的几何平均
def parse_value(value):
    try:
        return float(value)
    except ValueError:
        try:
            numerator, denominator = value.split('/')
            return float(numerator) / float(denominator)
        except ValueError:
            raise ValueError(f"无法解析数值: {value}")

# 定义矩阵转换函数
def convert_matrix(matrix):
    return [[parse_value(value) for value in row] for row in matrix]

# 定义几何平均函数
def geometric_mean_of_matrices(matrices):
    matrices = [np.array(convert_matrix(matrix), dtype=float) for matrix in matrices]
    product_matrix = np.ones_like(matrices[0], dtype=float)
    for idx, matrix in enumerate(matrices):
        product_matrix *= matrix
    return product_matrix ** (1 / len(matrices))

# 计算综合的成对比较矩阵
combined_matrix = geometric_mean_of_matrices(expert_matrices)
print("\n综合成对比较矩阵：")
print(combined_matrix)

# 计算综合成对比较矩阵的权重 (即最终权重)
final_weights = calculate_weights(combined_matrix)
print("\n最终的指标权重：")
print(final_weights)

# 归一化后的指标矩阵
normalized_matrix #= gamma_matrix

# 计算每个模型的最终综合评分 Q_i
Q = final_weights @ normalized_matrix.T  # 注意：此处需要转置 normalized_matrix

print("\n每个模型的最终综合评分 Q_i:")
print(Q)



                               0          1          2         3
RandomForest         7232.968442  11.553347  11.278412  0.008991
XGBoost              6174.664628   9.917479   9.785413  0.004000
LightGBM             6332.062296   9.817052   9.660965  0.001039
GBDT                 6032.521844   9.838345   9.694713  0.003468
AdaBoost            11472.034809  19.477741  17.345541  0.016279
SVM                 37538.448841  27.921595  28.045033  0.029620
NeuralNetwork       10316.194162  13.249480  13.614004  0.001003
DecisionTree         8655.627742  13.482591  13.129731  0.001006
LinearRegression    17120.459992  19.775353  18.945868  0.000997
Attention_LightGBM   6305.598576   9.752584   9.640049  0.002002
Adjusted Metrics (η):
[[  8.44214706   9.02083347   8.98841511  46.75381177]
 [  7.20692027   7.74355076   7.798559    20.80110054]
 [  7.39063104   7.66513758   7.69937915   5.40330191]
 [  7.04101462   7.68176296   7.72627425  18.03621681]
 [ 13.38988352  15.2081876   13.82365937  8

In [2]:
# 创建一个 DataFrame 来保存仿真数据和模型预测数据
comparison_df = pd.DataFrame({
    'Actual': y_test
})

# 将索引重置，方便可视化
comparison_df.reset_index(drop=True, inplace=True)

# 添加各模型的预测结果
comparison_df['RandomForest Predictions'] = rf_preds
comparison_df['XGBoost Predictions'] = xgb_preds
comparison_df['LightGBM Predictions'] = lgbm_preds
comparison_df['GBDT Predictions'] = gbdt_preds
comparison_df['AdaBoost Predictions'] = ada_preds
comparison_df['SVM Predictions'] = svm_preds
comparison_df['NeuralNetwork Predictions'] = nn_preds
comparison_df['DecisionTree Predictions'] = dt_preds
comparison_df['LinearRegression Predictions'] = lr_preds

# 可视化对比
def plot_comparison(data, model_names):
    plt.figure(figsize=(14, 8))
    plt.plot(data['Actual'], label='Actual (Simulation)', color='black', linestyle='-', marker='o')

    for model in model_names:
        plt.plot(data[model], label=model)

    plt.title('Comparison of Simulation Data vs Model Predictions')
    plt.xlabel('Sample Index')
    plt.ylabel('Evacuation Time')
    plt.legend()
    plt.tight_layout()
    plt.show()

# 绘制对比图
plot_comparison(comparison_df, [f'{model_name} Predictions' for model_name in ['RandomForest', 'XGBoost', 'LightGBM', 'GBDT', 'AdaBoost', 'SVM', 'NeuralNetwork', 'DecisionTree', 'LinearRegression']])

# 保存预测与仿真数据对比结果到 Excel
comparison_df.to_excel(r'D:\疏散数据汇总\预测与仿真对比结果_多模型_验证集.xlsx', index=False)
print("预测与仿真数据的对比结果已保存到 Excel 文件")


NameError: name 'y_test' is not defined