In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import ShuffleSplit
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR 
from sklearn.neighbors import KNeighborsRegressor


import time

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset,TensorDataset,DataLoader
from torch.utils.data import random_split
import matplotlib.pyplot as plt
import random, os

In [None]:
#Ensure data reproducibility
def random_seed(seed):
    random.seed(seed)

    os.environ['PYTHONHASHSEED'] =str(seed)

    np.random.seed(seed)

    torch.manual_seed(seed)

    torch.cuda.manual_seed(seed)

    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic =True

#Customize functions
#Define functions
#Standardization
def ss(features, labels):
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    X_s = scaler.fit_transform(features)
    X_s = pd.DataFrame(X_s)
    data = pd.concat([X_s, labels], axis=1)
    return data

def model_score(model, x, y, trainsize, testsize):
    cv = ShuffleSplit(n_splits=10, train_size=trainsize, test_size=testsize, random_state=0)
    rmse = cross_val_score(model, x, y, scoring="neg_mean_squared_error", cv=cv)
    rmse_score = np.sqrt(-rmse)
    rmse_mean = rmse_score.mean()

    mae = cross_val_score(model, x, y, scoring="neg_mean_absolute_error", cv=cv)
    mae_score = -mae
    mae_mean = mae_score.mean()

    r2 = cross_val_score(model, x, y, scoring='r2', cv=cv)
    r2_mean = r2.mean()

    scores = [rmse_score, rmse_mean, mae_score, mae_mean, r2, r2_mean]

    rmse = pd.DataFrame(scores[0], columns=['rmse'], index = [np.arange(len(scores[0]))])
    mae = pd.DataFrame(scores[2], columns=['mae'], index = [np.arange(len(scores[2]))])
    R2 = pd.DataFrame(scores[4], columns=['R2'], index = [np.arange(len(scores[4]))])
    scores_df = pd.concat([rmse,mae,R2], axis=1)
    return scores_df


def ToCsv(model, Xtest, ytest, filename):
    ytest = pd.DataFrame(ytest.values, index=[np.arange(len(ytest))], columns=['yreal1', 'yreal2', 'yreal3'])
    ypredict = model.predict(Xtest)
    ypredict = pd.DataFrame(ypredict, index=[np.arange(len(ytest))], columns=['ypredict1', 'ypredict2', 'ypredict3'])
    output = pd.concat([ytest, ypredict], axis=1)
    output.to_csv(filename)

def DataProcess(path):
    data = pd.read_csv(path)
    data_df = pd.DataFrame(data)
    X_df = data_df.iloc[:,1:6]
    y_df = data_df.iloc[:,6:9]

    data_s = ss(X_df,y_df)
    X = data_s.iloc[:,0:5]
    y = data_s.iloc[:,5:]
    return X, y

def DataSplit(X,y, testsize, seed):
    random_seed(seed)
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=testsize)
    return Xtrain, Xtest, ytrain, ytest

In [None]:
#TL-DNN model

#dataframe to tensor
def Df2Tensor(df):
    array = np.array(df)
    tensor = torch.tensor(array, dtype=torch.float32)
    return tensor

def ToDataset(*args):
    return TensorDataset(*args)

def ToDataLoader(dataset, batchsize):
    return DataLoader(dataset, batchsize, shuffle=True)


#Define the network structure
class Net(nn.Module):
    def __init__(self, 
            input_dim, output_dim, 
            hidden_layer1, hidden_layer2, hidden_layer3, hidden_layer4, 
            dropout1, dropout2, dropout3, dropout4):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(input_dim,hidden_layer1)
        self.layer2 = nn.Linear(hidden_layer1,hidden_layer2)
        self.layer3 = nn.Linear(hidden_layer2,hidden_layer3)
        self.layer4 = nn.Linear(hidden_layer3,hidden_layer4)
        self.layer5 = nn.Linear(hidden_layer4,output_dim)

        self.dropout1 = nn.Dropout(dropout1)
        self.dropout2 = nn.Dropout(dropout2)
        self.dropout3 = nn.Dropout(dropout3)
        self.dropout4 = nn.Dropout(dropout4)


    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.dropout1(x)

        x = self.layer2(x)
        x = F.relu(x)
        x = self.dropout2(x)

        x = self.layer3(x)
        x = F.relu(x)
        x = self.dropout3(x)

        x = self.layer4(x)
        x = F.relu(x)
        x = self.dropout4(x)

        x = self.layer5(x)
        return x

#Define model evaluation parameters
class Metrics():
    def __init__(self, net, dataloader):
        dataset = dataloader.dataset
        self.features = dataset[:][0]
        self.labels = dataset[:][1]
        self.y_hat = torch.clamp(net(self.features), 1, float('inf'))
    def rmse(self):
        return torch.sqrt(F.mse_loss(self.y_hat, self.labels))
    def mae(self):
        return F.l1_loss(self.y_hat, self.labels)
    def smooth_mae(self):
        return F.smooth_l1_loss(self.y_hat, self.labels)
    def r2(self):
        SS_res = torch.sum(torch.square(self.labels-self.y_hat))
        SS_tot = torch.sum(torch.square(self.labels - torch.mean(self.labels)))
        r2 = 1 - SS_res / SS_tot
        return r2 

def init_weights(m):
  if type(m) == nn.Linear:
    nn.init.normal_(m.weight, std=0.01)

def DataConcat(Xtrain, Xtest, ytrain, ytest):
    train_df = [Xtrain, ytrain]
    test_df = [Xtest, ytest]
    train_data = pd.concat(train_df,axis=1)
    test_data = pd.concat(test_df,axis=1)
    return train_data, test_data

from torch.optim.lr_scheduler import StepLR

def train(net, dataloader, loss, num_epochs, lr, wd):
    net.train()
    

    optimizer = torch.optim.Adam(net.parameters(), lr = lr, weight_decay = wd)
    scheduler = StepLR(optimizer, step_size=num_epochs/3, gamma=0.3)

    for epoch in range(num_epochs):
        for X, y in dataloader:
            optimizer.zero_grad()
            
            l = loss(net(X), y) 
            l.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()


#model evaluation
def NetEval(net, dataloader, num_epochs, loss, lr, wd):
    eval_list = []
    rmse, mae, r2 = [], [], []
    for epochs in range(num_epochs):
        net.train()
        train(net, dataloader, loss, num_epochs, lr, wd)

        net.eval()
        test_metrics = Metrics(net, dataloader)


        rmse.append(test_metrics.rmse().detach().item())
        mae.append(test_metrics.mae().detach().item())
        r2.append(test_metrics.r2().detach().item())
    return r2, mae, rmse

In [None]:
#Data set preprocessing
Path = "FEA_data.csv"

seed = 0
trainsize, testsize = 0.1,0.9

X, y = DataProcess(Path)
Xtrain, Xtest, ytrain, ytest = DataSplit(X,y, testsize, seed)

#Data preprocessing
train_dataset = ToDataset(Df2Tensor(Xtrain), Df2Tensor(ytrain))
test_dataset = ToDataset(Df2Tensor(Xtest), Df2Tensor(ytest))

batchsize = 54
train_dataloader = ToDataLoader(train_dataset, batchsize)
test_dataloader = ToDataLoader(test_dataset, batchsize)


In [None]:
#Transfer-Learning
AM_Path = 'AM_data.csv'
seed = 0
trainsize1, testsize1 = 0.99,0.01
X, y = DataProcess(AM_Path)
Xtrain1, Xtest1, ytrain1, ytest1 = DataSplit(X,y, testsize1, seed)

train_dataset1 = ToDataset(Df2Tensor(Xtrain1), Df2Tensor(ytrain1))
test_dataset1 = ToDataset(Df2Tensor(Xtest1), Df2Tensor(ytest1))

batchsize = 54
train_dataloader1 = ToDataLoader(train_dataset1, batchsize)
test_dataloader1= ToDataLoader(test_dataset1, batchsize)


In [None]:
#Define hyperparameters and the network
input_dim, output_dim, hidden_layer1, hidden_layer2, hidden_layer3, hidden_layer4 = 5, 3, 120,60,30,15
num_epochs, lr, wd, batch_size = 1000, 0.003, 0, 54
dropout1, dropout2, dropout3, dropout4 = 0,0.01,0.01,0.01

loss = nn.MSELoss()

net = Net(input_dim, output_dim, 
            hidden_layer1, hidden_layer2, hidden_layer3, hidden_layer4,
            dropout1, dropout2, dropout3, dropout4)
net.apply(init_weights)

Net(
  (layer1): Linear(in_features=5, out_features=120, bias=True)
  (layer2): Linear(in_features=120, out_features=60, bias=True)
  (layer3): Linear(in_features=60, out_features=30, bias=True)
  (layer4): Linear(in_features=30, out_features=15, bias=True)
  (layer5): Linear(in_features=15, out_features=3, bias=True)
  (dropout1): Dropout(p=0, inplace=False)
  (dropout2): Dropout(p=0.01, inplace=False)
  (dropout3): Dropout(p=0.01, inplace=False)
  (dropout4): Dropout(p=0.01, inplace=False)
)

In [None]:
#Pre-training
train(net, train_dataloader1, loss, num_epochs, lr, wd)

In [None]:
#Train the network
train(net, train_dataloader, loss, num_epochs, lr, wd)

In [None]:
#Model evaluation
eval_epochs = 10
wd = 0
r2, mae, rmse = NetEval(net, test_dataloader, eval_epochs, loss, lr, wd)

DTNN_r2_mean = np.mean(r2)
DTNN_mae_mean = np.mean(mae)
DTNN_rmse_mean = np.mean(rmse)

print('The average R² of TL-DNN:{}'.format(DTNN_r2_mean))
print('the average MAE:{}'.format(DTNN_mae_mean))
print('the average RMSE:{}'.format(DTNN_rmse_mean))


DTNN R2交叉验证平均值:0.9834182858467102
DTNN mae交叉验证平均值:2.777275896072388
DTNN rmse交叉验证平均值:4.077270460128784
