In [55]:
#机器学习包
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import make_scorer, explained_variance_score, median_absolute_error
from sklearn.model_selection import ShuffleSplit
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR 
from sklearn.neighbors import KNeighborsRegressor


import time

#神经网络包
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset,TensorDataset,DataLoader
from torch.utils.data import random_split
import matplotlib.pyplot as plt
import random, os

In [56]:
#保证数据可复现
def random_seed(seed):
    random.seed(seed)

    os.environ['PYTHONHASHSEED'] =str(seed)

    np.random.seed(seed)

    torch.manual_seed(seed)

    torch.cuda.manual_seed(seed)

    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic =True

#自定义函数
#定义函数
#标准化
def ss(features, labels):
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    X_s = scaler.fit_transform(features)
    X_s = pd.DataFrame(X_s)
    data = pd.concat([X_s, labels], axis=1)
    return data

#评估模型得分
#评估模型得分
# MAPE 计算函数
def mape_scorer(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


def model_score(model, x, y, trainsize, testsize):
    # 创建交叉验证生成器
    cv = ShuffleSplit(n_splits=10, train_size=trainsize, test_size=testsize, random_state=0)

    # 计算 RMSE
    rmse = cross_val_score(model, x, y, scoring="neg_mean_squared_error", cv=cv)
    rmse_score = np.sqrt(-rmse)
    rmse_mean = rmse_score.mean()

    # 计算 MAE
    mae = cross_val_score(model, x, y, scoring="neg_mean_absolute_error", cv=cv)
    mae_score = -mae
    mae_mean = mae_score.mean()

    # 计算 R²
    r2 = cross_val_score(model, x, y, scoring='r2', cv=cv)
    r2_mean = r2.mean()

    # 计算 MAPE
    mape = cross_val_score(model, x, y, scoring=make_scorer(mape_scorer, greater_is_better=False), cv=cv)
    mape_score = -mape
    mape_mean = mape_score.mean()

    # 计算 MedAE
    medae = cross_val_score(model, x, y, scoring=make_scorer(median_absolute_error, greater_is_better=False), cv=cv)
    medae_score = -medae
    medae_mean = medae_score.mean()

    # 将所有评分结果整合
    scores = [
        rmse_score, rmse_mean, mae_score, mae_mean, r2, r2_mean, mape_score, mape_mean, medae_score, medae_mean]
    
    # 创建各指标的 DataFrame
    rmse_df = pd.DataFrame(scores[0], columns=['rmse'], index=np.arange(len(scores[0])))
    mae_df = pd.DataFrame(scores[2], columns=['mae'], index=np.arange(len(scores[2])))
    r2_df = pd.DataFrame(scores[4], columns=['r2'], index=np.arange(len(scores[4])))
    mape_df = pd.DataFrame(scores[6], columns=['mape'], index=np.arange(len(scores[6])))
    medae_df = pd.DataFrame(scores[8], columns=['medae'], index=np.arange(len(scores[8])))

    # 合并所有得分结果
    scores_df = pd.concat([rmse_df, mae_df, r2_df, mape_df, medae_df], axis=1)

    return scores_df


#导出预测值到csv
def ToCsv(model, Xtest, ytest, filename):
    ytest = pd.DataFrame(ytest.values, index=[np.arange(len(ytest))], columns=['yreal1', 'yreal2', 'yreal3'])
    ypredict = model.predict(Xtest)
    ypredict = pd.DataFrame(ypredict, index=[np.arange(len(ytest))], columns=['ypredict1', 'ypredict2', 'ypredict3'])
    # ypredict
    output = pd.concat([ytest, ypredict], axis=1)
    output.to_csv(filename)

#数据导入＋预处理
#数据导入＋预处理
def DataProcess(path):
    data = pd.read_csv(path)
    data_df = pd.DataFrame(data)
    X_df = data_df.iloc[:,1:6]
    y_df = data_df.iloc[:,6:9]

    data_s = ss(X_df,y_df)
    X = data_s.iloc[:,0:5]
    y = data_s.iloc[:,5:]
    return X, y

def DataSplit(X,y, testsize, seed):
    random_seed(seed)
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=testsize)
    return Xtrain, Xtest, ytrain, ytest

In [57]:
#DNN模型、函数

#神经网络数据处理三步：
#1.加载数据，提取出feature和label，并转换成tensor
#2.传入TensorDataset中，实例化TensorDataset为datsset
#3.再将dataset传入到Dataloader中，最后通过enumerate输出我们想要的经过shuffle的bachsize大小的feature和label数据

#dataframe转换为tensor
def Df2Tensor(df):
    array = np.array(df)
    tensor = torch.tensor(array, dtype=torch.float32)
    return tensor

def ToDataset(*args):
    return TensorDataset(*args)

#dataset变迭代器
def ToDataLoader(dataset, batchsize):
    return DataLoader(dataset, batchsize, shuffle=True)


'''定义网络结构'''
class Net(nn.Module):
    def __init__(self, 
            input_dim, output_dim, 
            hidden_layer1, hidden_layer2, hidden_layer3, hidden_layer4, 
            dropout1, dropout2, dropout3, dropout4):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(input_dim,hidden_layer1)
        self.layer2 = nn.Linear(hidden_layer1,hidden_layer2)
        self.layer3 = nn.Linear(hidden_layer2,hidden_layer3)
        self.layer4 = nn.Linear(hidden_layer3,hidden_layer4)
        self.layer5 = nn.Linear(hidden_layer4,output_dim)

        self.dropout1 = nn.Dropout(dropout1)
        self.dropout2 = nn.Dropout(dropout2)
        self.dropout3 = nn.Dropout(dropout3)
        self.dropout4 = nn.Dropout(dropout4)

        # self.relu = nn.ReLU()

    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.dropout1(x)

        x = self.layer2(x)
        x = F.relu(x)
        x = self.dropout2(x)

        x = self.layer3(x)
        x = F.relu(x)
        x = self.dropout3(x)

        x = self.layer4(x)
        x = F.relu(x)
        x = self.dropout4(x)

        x = self.layer5(x)
        return x

 #定义模型评估参数
class Metrics:
    def __init__(self, net, dataloader):
        dataset = dataloader.dataset
        self.features = dataset[:][0]
        self.labels = dataset[:][1]
        # 将预测值限定在 1 到正无穷之间，以避免取对数时出现负值
        self.y_hat = torch.clamp(net(self.features), 1, float('inf'))
    
    def rmse(self):
        """计算 RMSE"""
        return torch.sqrt(F.mse_loss(self.y_hat, self.labels))
    
    def mae(self):
        """计算 MAE"""
        return F.l1_loss(self.y_hat, self.labels)
    
    def smooth_mae(self):
        """计算 Smooth L1 Loss (平滑 MAE)"""
        return F.smooth_l1_loss(self.y_hat, self.labels)
    
    def r2(self):
        """计算 R² (决定系数)"""
        SS_res = torch.sum(torch.square(self.labels - self.y_hat))
        SS_tot = torch.sum(torch.square(self.labels - torch.mean(self.labels)))
        r2 = 1 - SS_res / SS_tot
        return r2
    
    def mape(self):
        """计算 MAPE (平均绝对百分比误差)"""
        return torch.mean(torch.abs((self.labels - self.y_hat) / self.labels)) * 100
    
    def medae(self):
        """计算 MedAE (中位绝对误差)"""
        return torch.median(torch.abs(self.labels - self.y_hat))
    
# #初始化权重
def init_weights(m):
  if type(m) == nn.Linear:
    nn.init.normal_(m.weight, std=0.01)

#数据集特征与标签合并
def DataConcat(Xtrain, Xtest, ytrain, ytest):
    train_df = [Xtrain, ytrain]
    test_df = [Xtest, ytest]
    train_data = pd.concat(train_df,axis=1)
    test_data = pd.concat(test_df,axis=1)
    return train_data, test_data

#定义训练函数,用Adam优化器训练
from torch.optim.lr_scheduler import StepLR

def train(net, dataloader, loss, num_epochs, lr, wd):
    net.train()
    
    #train_data[:][0]可以获取train_data中的特征，[1]获取标签

    # 这里使用的是Adam优化算法
    optimizer = torch.optim.Adam(net.parameters(), lr = lr, weight_decay = wd)
    #每隔一个step_size,学习率乘以gamma
    scheduler = StepLR(optimizer, step_size=num_epochs/3, gamma=0.3)

    for epoch in range(num_epochs):
        for X, y in dataloader:
            optimizer.zero_grad()
            
            l = loss(net(X), y) 
            l.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

            # with torch.no_grad():
            #     print(l)

#模型评估
def NetEval(net, dataloader, num_epochs, loss, lr, wd):
    # 模型评估指标矩阵
    rmse, mae, r2, mape, medae = [], [], [], [], []

    # 模型训练过程
    for epoch in range(num_epochs):
        net.train()
        train(net, dataloader, loss, num_epochs, lr, wd)

        net.eval()
        test_metrics = Metrics(net, dataloader)

        # 计算各项指标并存储
        rmse.append(test_metrics.rmse().detach().item())
        mae.append(test_metrics.mae().detach().item())
        r2.append(test_metrics.r2().detach().item())
        mape.append(test_metrics.mape().detach().item())  # 计算 MAPE
        medae.append(test_metrics.medae().detach().item())  # 计算 MedAE

    # 返回各项指标
    return r2, mae, rmse, mape, medae

In [58]:
#数据集预处理
Path = "FEM_data.csv"

seed = 0
trainsize, testsize = 0.5,0.5

X, y = DataProcess(Path)
Xtrain, Xtest, ytrain, ytest = DataSplit(X,y, testsize, seed)

In [59]:
#RF拟合
random_seed(0)
regressor = RandomForestRegressor(n_estimators = 100, max_depth=5)
# regressor = RandomForestRegressor(n_estimators = 100, max_depth=3)
# regressor = RandomForestRegressor(n_estimators = 140, max_depth=7)

regressor = MultiOutputRegressor(regressor)
regressor.fit(Xtrain, ytrain)

s = "./未迁移结果汇总/RF{}-{}.csv".format(int(trainsize*10), int(testsize*10))
ToCsv(regressor, Xtest, ytest, s)

#RF评估
RF_scores = model_score(regressor, X, y, trainsize, testsize)

# 提取各个指标的值
R2_values = RF_scores['r2']
mae_values = RF_scores['mae']
rmse_values = RF_scores['rmse']
mape_values = RF_scores['mape']
medae_values = RF_scores['medae']

# 计算各个指标的平均值
RF_R2_mean = np.mean(R2_values)                               # 平均 R²
RF_mae_mean = np.mean(mae_values)                             # 平均 MAE
RF_rmse_mean = np.mean(rmse_values)                           # 平均 RMSE
RF_mape_mean = np.mean(mape_values)                           # 平均 MAPE
RF_medae_mean = np.mean(medae_values)                         # 平均 MedAE

# 输出各个指标的平均值
print("RF 模型的评估结果：")
print(f"RF_R²: {RF_R2_mean}")
print(f"RF_MAE: {RF_mae_mean}")
print(f"RF_RMSE: {RF_rmse_mean}")
print(f"RF_MAPE: {RF_mape_mean}")
print(f"RF_MedAE: {RF_medae_mean}")

RF 模型的评估结果：
RF_R²: 0.9096951559793617
RF_MAE: 3.2081939678947387
RF_RMSE: 5.1324329579849675
RF_MAPE: 6.555217617073993
RF_MedAE: 2.6164573759253376


In [60]:
#SVM拟合
random_seed(0)
# regressor = SVR(C=100)
# regressor = SVR(C=5,gamma=1.5)
regressor = SVR(C=2)
regressor = MultiOutputRegressor(regressor)
regressor.fit(Xtrain, ytrain)

s = "./未迁移结果汇总/SVM{}-{}.csv".format(int(trainsize*10), int(testsize*10))
ToCsv(regressor, Xtest, ytest, s)

#SVM评估
SVM_scores = model_score(regressor, X, y, trainsize, testsize)

# 提取各个指标的值
R2_values = SVM_scores['r2']
mae_values = SVM_scores['mae']
rmse_values = SVM_scores['rmse']
mape_values = SVM_scores['mape']
medae_values = SVM_scores['medae']

# 计算各个指标的平均值
SVM_R2_mean = np.mean(R2_values)                               # 平均 R²
SVM_mae_mean = np.mean(mae_values)                             # 平均 MAE
SVM_rmse_mean = np.mean(rmse_values)                           # 平均 RMSE
SVM_mape_mean = np.mean(mape_values)                           # 平均 MAPE
SVM_medae_mean = np.mean(medae_values)                         # 平均 MedAE

# 输出各个指标的平均值
print("SVM 模型的评估结果：")
print(f"SVM_R²: {SVM_R2_mean}")
print(f"SVM_MAE: {SVM_mae_mean}")
print(f"SVM_RMSE: {SVM_rmse_mean}")
print(f"SVM_MAPE: {SVM_mape_mean}")
print(f"SVM_MedAE: {SVM_medae_mean}")

SVM 模型的评估结果：
SVM_R²: 0.8861336001482977
SVM_MAE: 3.4649654977778526
SVM_RMSE: 5.7649047092987615
SVM_MAPE: 6.806182628830852
SVM_MedAE: 2.7214238294143227


In [61]:
#数据预处理
# Xtrain, Xtest, ytrain, ytest
#将tensor转化为dataset对象
train_dataset = ToDataset(Df2Tensor(Xtrain), Df2Tensor(ytrain))
test_dataset = ToDataset(Df2Tensor(Xtest), Df2Tensor(ytest))

batchsize = 54
train_dataloader = ToDataLoader(train_dataset, batchsize)
test_dataloader = ToDataLoader(test_dataset, batchsize)


#定义超参数与网络
input_dim, output_dim, hidden_layer1, hidden_layer2, hidden_layer3, hidden_layer4 = 5, 3, 120,60,30,15
# num_epochs, lr, weight_decay, batch_size = 3000, 0.01, 0.002, 54
num_epochs, lr, wd, batch_size = 1000, 0.01, 0, 54
dropout1, dropout2, dropout3, dropout4 = 0,0,0,0

loss = nn.MSELoss()

net = Net(input_dim, output_dim, 
            hidden_layer1, hidden_layer2, hidden_layer3, hidden_layer4,
            dropout1, dropout2, dropout3, dropout4)
net.apply(init_weights)


Net(
  (layer1): Linear(in_features=5, out_features=120, bias=True)
  (layer2): Linear(in_features=120, out_features=60, bias=True)
  (layer3): Linear(in_features=60, out_features=30, bias=True)
  (layer4): Linear(in_features=30, out_features=15, bias=True)
  (layer5): Linear(in_features=15, out_features=3, bias=True)
  (dropout1): Dropout(p=0, inplace=False)
  (dropout2): Dropout(p=0, inplace=False)
  (dropout3): Dropout(p=0, inplace=False)
  (dropout4): Dropout(p=0, inplace=False)
)

In [62]:
#训练网络
train(net, train_dataloader, loss, num_epochs, lr, wd)

In [63]:
#保存数据
def ToCsv(model, Xtest, ytest, ypredict, filename):
    ytest = pd.DataFrame(ytest, index=[np.arange(len(ytest))], columns=['yreal1', 'yreal2', 'yreal3'])

    ypredict = pd.DataFrame(ypredict, index=[np.arange(len(ytest))], columns=['ypredict1', 'ypredict2', 'ypredict3'])
    # ypredict
    output = pd.concat([ytest, ypredict], axis=1)
    output.to_csv(filename)
    
y_predict = net(Df2Tensor(Xtest)).detach().numpy()
y_test = Df2Tensor(ytest).detach().numpy()

s = './未迁移结果汇总/DNN{:d}-{:d}.csv'.format(int(trainsize*10), int(testsize*10))
ToCsv(net,Xtest,y_test,y_predict,s)

In [64]:
#模型评估
eval_epochs = 10
wd = 0
r2, mae, rmse, mape, medae = NetEval(net, test_dataloader, eval_epochs, loss, lr, wd)

# 计算各个指标的平均值
DTNN_R2_mean = np.mean(r2)                               # 平均 R²
DTNN_mae_mean = np.mean(mae)                             # 平均 MAE
DTNN_rmse_mean = np.mean(rmse)                           # 平均 RMSE
DTNN_mape_mean = np.mean(mape)                           # 平均 MAPE
DTNN_medae_mean = np.mean(medae)                         # 平均 MedAE

# 输出各个指标的平均值
print("DTNN 模型的评估结果：")
print(f"DTNN_R²: {DTNN_R2_mean}")
print(f"DTNN_MAE: {DTNN_mae_mean}")
print(f"DTNN_RMSE: {DTNN_rmse_mean}")
print(f"DTNN_MAPE: {DTNN_mape_mean}")
print(f"DTNN_MedAE: {DTNN_medae_mean}")

DTNN 模型的评估结果：
DTNN_R²: 0.9889209866523743
DTNN_MAE: 2.2735061168670656
DTNN_RMSE: 3.353835201263428
DTNN_MAPE: 4.740410852432251
DTNN_MedAE: 1.2910142898559571


In [65]:
all_evals = [
    [RF_R2_mean, RF_mae_mean, RF_rmse_mean, RF_mape_mean, RF_medae_mean],
    [SVM_R2_mean, SVM_mae_mean, SVM_rmse_mean, SVM_mape_mean, SVM_medae_mean],
    [DTNN_R2_mean, DTNN_mae_mean, DTNN_rmse_mean, DTNN_mape_mean, DTNN_medae_mean]
]

df = pd.DataFrame(all_evals, columns=[
    'R2', 'MAE', 'RMSE', 'MAPE', 'MEDAE'
], index=['RF', 'SVM', 'DTNN'], dtype=float)
df.to_csv("./未迁移结果汇总/未迁移评估指标汇总{}-{}.csv".format(int(trainsize*10), int(testsize*10)))