In [34]:
#机器学习包
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import make_scorer, explained_variance_score, median_absolute_error
from sklearn.model_selection import ShuffleSplit
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR 
from sklearn.neighbors import KNeighborsRegressor
import time

#神经网络包
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset,TensorDataset,DataLoader
from torch.utils.data import random_split
import matplotlib.pyplot as plt
import random, os

In [35]:
#保证数据可复现
#设置全局随机种子，以确保代码运行时的随机性得到控制，从而保证数据可复现性
def random_seed(seed):
      # 控制 Python 内置的随机数生成器
    random.seed(seed)

    # 控制 Python 内部的哈希函数随机化，以确保字符串的哈希值是可复现的
    os.environ['PYTHONHASHSEED'] = str(seed)

    # 控制 NumPy 的随机数生成器
    np.random.seed(seed)

    # 控制 PyTorch 的 CPU 随机数生成器
    torch.manual_seed(seed)

    # 控制 PyTorch 在使用单个 GPU 时的随机性
    torch.cuda.manual_seed(seed)

    # 控制 PyTorch 在使用多个 GPU 时的随机性
    torch.cuda.manual_seed_all(seed)

    # 确保 PyTorch 的计算是确定性的，禁用某些非确定性算法
    torch.backends.cudnn.deterministic = True

#自定义函数
#定义函数
#标准化数据
#这个函数用于对特征数据进行标准化处理
#（即将数据转换为均值为 0、方差为 1 的标准正态分布），并返回标准化后的数据和标签的组合。
def ss(features, labels):
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()   # 使用标准化工具
    X_s = scaler.fit_transform(features)  # 对特征进行标准化
    X_s = pd.DataFrame(X_s)  # 转换为 DataFrame
    data = pd.concat([X_s, labels], axis=1)  # 将标准化后的特征与标签拼接
    return data

# MAPE 计算函数
def mape_scorer(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


def model_score(model, x, y, trainsize, testsize):
    # 创建交叉验证生成器
    cv = ShuffleSplit(n_splits=10, train_size=trainsize, test_size=testsize, random_state=0)

    # 计算 RMSE
    rmse = cross_val_score(model, x, y, scoring="neg_mean_squared_error", cv=cv)
    rmse_score = np.sqrt(-rmse)
    rmse_mean = rmse_score.mean()

    # 计算 MAE
    mae = cross_val_score(model, x, y, scoring="neg_mean_absolute_error", cv=cv)
    mae_score = -mae
    mae_mean = mae_score.mean()

    # 计算 R²
    r2 = cross_val_score(model, x, y, scoring='r2', cv=cv)
    r2_mean = r2.mean()

    # 计算 MAPE
    mape = cross_val_score(model, x, y, scoring=make_scorer(mape_scorer, greater_is_better=False), cv=cv)
    mape_score = -mape
    mape_mean = mape_score.mean()

    # 计算 MedAE
    medae = cross_val_score(model, x, y, scoring=make_scorer(median_absolute_error, greater_is_better=False), cv=cv)
    medae_score = -medae
    medae_mean = medae_score.mean()

    # 将所有评分结果整合
    scores = [
        rmse_score, rmse_mean, mae_score, mae_mean, r2, r2_mean, mape_score, mape_mean, medae_score, medae_mean]
    
    # 创建各指标的 DataFrame
    rmse_df = pd.DataFrame(scores[0], columns=['rmse'], index=np.arange(len(scores[0])))
    mae_df = pd.DataFrame(scores[2], columns=['mae'], index=np.arange(len(scores[2])))
    r2_df = pd.DataFrame(scores[4], columns=['r2'], index=np.arange(len(scores[4])))
    mape_df = pd.DataFrame(scores[6], columns=['mape'], index=np.arange(len(scores[6])))
    medae_df = pd.DataFrame(scores[8], columns=['medae'], index=np.arange(len(scores[8])))

    # 合并所有得分结果
    scores_df = pd.concat([rmse_df, mae_df, r2_df, mape_df, medae_df], axis=1)

    return scores_df



#导出预测值到csv
#此函数将真实值和预测值写入到 CSV 文件中。
def ToCsv(model, Xtest, ytest, filename):
    ytest = pd.DataFrame(ytest.values, index=[np.arange(len(ytest))], columns=['yreal1', 'yreal2', 'yreal3'])
    ypredict = model.predict(Xtest)
    ypredict = pd.DataFrame(ypredict, index=[np.arange(len(ytest))], columns=['ypredict1', 'ypredict2', 'ypredict3'])
    # ypredict
    # 合并真实值和预测值
    output = pd.concat([ytest, ypredict], axis=1)
    output.to_csv(filename)

#数据导入＋预处理
#此函数从 CSV 文件读取数据，并对其进行标准化处理，最后返回特征和标签
def DataProcess(path):
    data = pd.read_csv(path)
    data_df = pd.DataFrame(data)
    # 提取特征和标签
    X = data_df.iloc[:,1:6]
    y = data_df.iloc[:,6:9]
    # # 标准化处理
    # data_s = ss(X_df,y_df)
    # X = data_df.iloc[:,0:5]
    # y = data_df.iloc[:,5:]
    return X, y

def DataProcess1(path):
    data = pd.read_csv(path)
    data_df = pd.DataFrame(data)
    # 提取特征和标签
    X_df = data_df.iloc[:,1:6]
    y_df = data_df.iloc[:,6:9]
    # 标准化处理
    data_s = ss(X_df,y_df)
    X = data_s.iloc[:,0:5]
    y = data_s.iloc[:,5:]
    return X, y

#数据集拆分
def DataSplit(X, y, trainsize, testsize):
    # 计算训练集的样本数量
    n_train = int(len(X) * trainsize)
    
    # 将前面部分作为训练集，后面部分作为测试集
    Xtrain, Xtest = X.iloc[:n_train], X.iloc[n_train:]
    ytrain, ytest = y.iloc[:n_train], y.iloc[n_train:]
    
    return Xtrain, Xtest, ytrain, ytest

#此函数将特征和标签数据集进行训练集和测试集的划分，并使用固定的随机种子来保证可复现性。
def DataSplit1(X,y, testsize, seed):
    random_seed(seed)
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=testsize)
    return Xtrain, Xtest, ytrain, ytest


In [36]:
#DNN模型、函数

#神经网络数据处理三步：
#1.加载数据，提取出feature和label，并转换成tensor
#2.传入TensorDataset中，实例化TensorDataset为datsset
#3.再将dataset传入到Dataloader中，最后通过enumerate输出我们想要的经过shuffle的bachsize大小的feature和label数据
'''数据预处理'''
#dataframe转换为tensor
#将DataFrame数据转换为PyTorch张量。
def Df2Tensor(df):
    array = np.array(df)
    tensor = torch.tensor(array, dtype=torch.float32)
    return tensor
#将张量打包成TensorDataset，供模型使用。
def ToDataset(*args):
    return TensorDataset(*args)

#dataset变迭代器
#将数据集转为可迭代的DataLoader，方便进行批量训练。
def ToDataLoader(dataset, batchsize):
    return DataLoader(dataset, batchsize, shuffle=True)


'''定义网络结构'''
#Net类定义了一个拥有四层隐藏层的神经网络，并在每一层之间应用了Dropout正则化。ReLU函数用于激活。
#nn.Linear（全连接层）、激活函数如ReLU、Sigmoid，以及正则化层如Dropout
class Net(nn.Module):
    def __init__(self, 
            input_dim, output_dim, 
            hidden_layer1, hidden_layer2, hidden_layer3, hidden_layer4, 
            dropout1, dropout2, dropout3, dropout4):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(input_dim,hidden_layer1)
        self.layer2 = nn.Linear(hidden_layer1,hidden_layer2)
        self.layer3 = nn.Linear(hidden_layer2,hidden_layer3)
        self.layer4 = nn.Linear(hidden_layer3,hidden_layer4)
        self.layer5 = nn.Linear(hidden_layer4,output_dim)

        self.dropout1 = nn.Dropout(dropout1)
        self.dropout2 = nn.Dropout(dropout2)
        self.dropout3 = nn.Dropout(dropout3)
        self.dropout4 = nn.Dropout(dropout4)

        # self.relu = nn.ReLU()
#前向传播：输入数据经过模型进行前向计算，输出预测值。
    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.dropout1(x)

        x = self.layer2(x)
        x = F.relu(x)
        x = self.dropout2(x)

        x = self.layer3(x)
        x = F.relu(x)
        x = self.dropout3(x)

        x = self.layer4(x)
        x = F.relu(x)
        x = self.dropout4(x)

        x = self.layer5(x)
        return x

'''训练与评估'''
 #定义模型评估参数
 #Metrics类：定义了常见的评估指标，包括rmse（均方根误差）、mae（平均绝对误差）和r2（R²决定系数）
class Metrics:
    def __init__(self, net, dataloader):
        dataset = dataloader.dataset
        self.features = dataset[:][0]
        self.labels = dataset[:][1]
        # 将预测值限定在 1 到正无穷之间，以避免取对数时出现负值
        self.y_hat = torch.clamp(net(self.features), 1, float('inf'))
    
    def rmse(self):
        """计算 RMSE"""
        return torch.sqrt(F.mse_loss(self.y_hat, self.labels))
    
    def mae(self):
        """计算 MAE"""
        return F.l1_loss(self.y_hat, self.labels)
    
    def smooth_mae(self):
        """计算 Smooth L1 Loss (平滑 MAE)"""
        return F.smooth_l1_loss(self.y_hat, self.labels)
    
    def r2(self):
        """计算 R² (决定系数)"""
        SS_res = torch.sum(torch.square(self.labels - self.y_hat))
        SS_tot = torch.sum(torch.square(self.labels - torch.mean(self.labels)))
        r2 = 1 - SS_res / SS_tot
        return r2
    
    def mape(self):
        """计算 MAPE (平均绝对百分比误差)"""
        return torch.mean(torch.abs((self.labels - self.y_hat) / self.labels)) * 100
    
    def medae(self):
        """计算 MedAE (中位绝对误差)"""
        return torch.median(torch.abs(self.labels - self.y_hat))



# #初始化权重
def init_weights(m):
  if type(m) == nn.Linear:
    nn.init.normal_(m.weight, std=0.01)

#数据集特征与标签合并
def DataConcat(Xtrain, Xtest, ytrain, ytest):
    train_df = [Xtrain, ytrain]
    test_df = [Xtest, ytest]
    train_data = pd.concat(train_df,axis=1)
    test_data = pd.concat(test_df,axis=1)
    return train_data, test_data

#定义训练函数,用Adam优化器训练
from torch.optim.lr_scheduler import StepLR

#train：使用Adam优化器训练模型，并引入了学习率调度器StepLR，控制学习率逐渐衰减

#前向传播：输入数据经过模型进行前向计算，输出预测值。
#计算损失：将预测值与真实标签计算损失值。
#反向传播：通过loss.backward()计算梯度。
#优化更新：调用optimizer.step()更新模型参数，之后用optimizer.zero_grad()清空梯度。
#重复以上步骤：遍历整个数据集若干次（称为epoch），逐步优化模型。
def train(net, dataloader, loss, num_epochs, lr, wd):
    net.train()
    
    #train_data[:][0]可以获取train_data中的特征，[1]获取标签

    # 这里使用的是Adam优化算法
    optimizer = torch.optim.Adam(net.parameters(), lr = lr, weight_decay = wd)
    #每隔一个step_size,学习率乘以gamma
    scheduler = StepLR(optimizer, step_size=num_epochs/3, gamma=0.3)
#遍历整个数据集若干次（称为epoch)
    for epoch in range(num_epochs):
        for X, y in dataloader:
            optimizer.zero_grad()
            
            l = loss(net(X), y) 
            l.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

            # with torch.no_grad():
            #     print(l)

#模型评估
def NetEval(net, dataloader, num_epochs, loss, lr, wd):
    # 模型评估指标矩阵
    rmse, mae, r2, mape, medae = [], [], [], [], []

    # 模型训练过程
    for epoch in range(num_epochs):
        net.train()
        train(net, dataloader, loss, num_epochs, lr, wd)

        net.eval()
        test_metrics = Metrics(net, dataloader)

        # 计算各项指标并存储
        rmse.append(test_metrics.rmse().detach().item())
        mae.append(test_metrics.mae().detach().item())
        r2.append(test_metrics.r2().detach().item())
        mape.append(test_metrics.mape().detach().item())  # 计算 MAPE
        medae.append(test_metrics.medae().detach().item())  # 计算 MedAE

    # 返回各项指标
    return r2, mae, rmse, mape, medae



In [39]:
# 数据集预处理
Path = "FEA——train0.1.csv"

seed = 0
trainsize, testsize = 0.1, 0.9


# 加载并划分数据集
X, y = DataProcess(Path)
Xtrain, Xtest, ytrain, ytest = DataSplit(X, y, trainsize, testsize)

# 打印训练集的 X 和 y
print("训练集特征 (Xtrain):")
print(Xtrain)
print("\n训练集标签 (ytrain):")
print(ytrain)

# 数据预处理
# 将 tensor 转化为 dataset 对象
train_dataset = ToDataset(Df2Tensor(Xtrain), Df2Tensor(ytrain))
test_dataset = ToDataset(Df2Tensor(Xtest), Df2Tensor(ytest))

# 设置 batch size
batchsize = 54
train_dataloader = ToDataLoader(train_dataset, batchsize)
test_dataloader = ToDataLoader(test_dataset, batchsize)


训练集特征 (Xtrain):
    槽开口宽度Bs0（mm）  永磁体轴向长度/动子极距            a  气隙厚度gap（mm）  线圈径向宽度rcoil（mm）  \
0       0.709952                   0.447214     1.341641         1.224745   
1       0.709952                  -0.447214    -1.341641         1.224745   
2       0.709952                   0.447214    -1.341641         1.224745   
3       0.709952                   1.341641     1.341641        -1.224745   
4       0.709952                   0.447214    -1.341641        -1.224745   
..           ...                        ...          ...              ...   
67      0.283981                   0.447214    -1.341641         1.224745   
68      0.709952                   0.447214     1.341641        -1.224745   
69      0.709952                  -0.447214    -1.341641         1.224745   
70      0.283981                   0.447214     1.341641         1.224745   
71      0.283981                   0.447214     1.341641        -1.224745   

    永磁体径向长度rpm（mm）  
0          0.29277  
1         -0.8783

In [27]:
#导入解析法数据
AM_Path = 'AM_data.csv'
seed = 0
trainsize1, testsize1 = 0.99,0.01
X, y = DataProcess1(AM_Path)
Xtrain1, Xtest1, ytrain1, ytest1 = DataSplit1(X,y, testsize1, seed)

#数据预处理
# Xtrain, Xtest, ytrain, ytest
#将tensor转化为dataset对象
train_dataset1 = ToDataset(Df2Tensor(Xtrain1), Df2Tensor(ytrain1))
test_dataset1 = ToDataset(Df2Tensor(Xtest1), Df2Tensor(ytest1))

batchsize = 54
train_dataloader1 = ToDataLoader(train_dataset1, batchsize)
test_dataloader1= ToDataLoader(test_dataset1, batchsize)

# for i,d in enumerate(train_dataloader):
#     X,y = d
#     print(y)

In [28]:
#定义超参数与网络
input_dim, output_dim, hidden_layer1, hidden_layer2, hidden_layer3, hidden_layer4 = 5, 3, 120,60,30,15
# num_epochs, lr, weight_decay, batch_size = 3000, 0.01, 0.002, 54
num_epochs, lr, wd, batch_size = 1000, 0.003, 0, 54
dropout1, dropout2, dropout3, dropout4 = 0,0.01,0.01,0.01

loss = nn.MSELoss()




net = Net(input_dim, output_dim, 
            hidden_layer1, hidden_layer2, hidden_layer3, hidden_layer4,
            dropout1, dropout2, dropout3, dropout4)
net.apply(init_weights)

Net(
  (layer1): Linear(in_features=5, out_features=120, bias=True)
  (layer2): Linear(in_features=120, out_features=60, bias=True)
  (layer3): Linear(in_features=60, out_features=30, bias=True)
  (layer4): Linear(in_features=30, out_features=15, bias=True)
  (layer5): Linear(in_features=15, out_features=3, bias=True)
  (dropout1): Dropout(p=0, inplace=False)
  (dropout2): Dropout(p=0.01, inplace=False)
  (dropout3): Dropout(p=0.01, inplace=False)
  (dropout4): Dropout(p=0.01, inplace=False)
)

In [29]:
#解析法数据训练网络
train(net, train_dataloader1, loss, num_epochs, lr, wd)

In [30]:
##FEA数据训练网络
train(net, train_dataloader, loss, num_epochs, lr, wd)

In [None]:
# 模型评估
eval_epochs = 10
wd = 0
r2, mae, rmse, mape, medae = NetEval(net, test_dataloader, eval_epochs, loss, lr, wd)

eval_epochs = 10
lr1 = lr/2
r2, mae, rmse, mape, medae = NetEval(net, test_dataloader, eval_epochs, loss, lr1, wd)

eval_epochs = 10
lr1 = lr/2
r2, mae, rmse, mape, medae = NetEval(net, test_dataloader, eval_epochs, loss, lr1, wd)

# 计算各个指标的平均值
DTNN_R2_mean = np.mean(r2)                               # 平均 R²
DTNN_mae_mean = np.mean(mae)                             # 平均 MAE
DTNN_rmse_mean = np.mean(rmse)                           # 平均 RMSE
DTNN_mape_mean = np.mean(mape)                           # 平均 MAPE
DTNN_medae_mean = np.mean(medae)                         # 平均 MedAE

# 输出各个指标的平均值
print("DTNN 模型的评估结果：")
print(f"DTNN_R²: {DTNN_R2_mean}")
print(f"DTNN_MAE: {DTNN_mae_mean}")
print(f"DTNN_RMSE: {DTNN_rmse_mean}")
print(f"DTNN_MAPE: {DTNN_mape_mean}")
print(f"DTNN_MedAE: {DTNN_medae_mean}")



DTNN 模型的评估结果：
DTNN_R²: 0.9904209494590759
DTNN_MAE: 2.1184255599975588
DTNN_RMSE: 3.095784401893616
DTNN_MAPE: 4.857172727584839
DTNN_MedAE: 1.2506858825683593


In [32]:
#保存数据
def ToCsv(model, Xtest, ytest, ypredict, filename):
    ytest = pd.DataFrame(ytest, index=[np.arange(len(ytest))], columns=['yreal1', 'yreal2', 'yreal3'])

    ypredict = pd.DataFrame(ypredict, index=[np.arange(len(ytest))], columns=['ypredict1', 'ypredict2', 'ypredict3'])
    # ypredict
    output = pd.concat([ytest, ypredict], axis=1)
    output.to_csv(filename)
    
y_predict = net(Df2Tensor(Xtest)).detach().numpy()
y_test = Df2Tensor(ytest).detach().numpy()

s = './样本挑选-迁移结果汇总/TL——DNN——Picking结果{}-{}.csv'.format(int(trainsize*10), int(testsize*10))
ToCsv(net,Xtest,y_test,y_predict,s)


In [33]:


all_evals = [[DTNN_R2_mean, DTNN_mae_mean, DTNN_rmse_mean, DTNN_mape_mean, DTNN_medae_mean]]

df = pd.DataFrame(all_evals, columns=[
    'R2', 'MAE', 'RMSE', 'MAPE','MEDAE'
], index=['DTNN'], dtype=float)

df.to_csv("./样本挑选-迁移结果汇总/TL——DNN——Picking评估指标汇总ce{}-{}.csv".format(int(trainsize*10), int(testsize*10)))