In [11]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import torch.nn.functional as F
from torch import optim
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from timeit import default_timer as timer
from torch.utils.data import DataLoader

In [12]:
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_forecasting import RMSE,MAE,MAPE
from prettytable import PrettyTable

### x_train and  y_train

In [13]:
x_train = torch.load('./data/google-RoBERTa.train.review.pt')
x_test = torch.load('./data/google-RoBERTa.test.review.pt')
x_val = torch.load('./data/google-RoBERTa.val.review.pt')

y_train = torch.load('./data/google.train.rating.pt') 
y_test =torch.load('./data/google.test.rating.pt')
y_val = torch.load('./data/google.val.rating.pt')


### TensorDataset、Dataloader
Dataloader：把 Dataset類轉換成方便model處理的東西 batch

In [14]:
import torch.utils.data as data_utils
train_dataset = data_utils.TensorDataset(x_train, y_train)
test_dataset = data_utils.TensorDataset(x_test, y_test)
val_dataset = data_utils.TensorDataset(x_val, y_val)

In [15]:
batch_size = 32
epochs = 50
learning_rate =0.0001
input_dim = 768

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # drop_last=True
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

### 構建神經元網絡

In [16]:
class NN(torch.nn.Module):
    def __init__(self,rv_input_n,rv_output_dim):  
        super(NN, self).__init__()  #繼承父class torch.nn.Module

        self.dnn = torch.nn.Sequential(
            torch.nn.Linear(rv_input_n,2048),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(2048,1024),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(1024,rv_output_dim),
        )

    def forward(self,x):
        x = x.view(x.size(0), -1)
        rv3 = self.dnn(x)

        return rv3

### build model

In [17]:
def count_parameters(model): # 模型參數統計
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: 
            continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

nn_model = NN(input_dim,1).to(device)
optimizer = torch.optim.Adam(nn_model.parameters(),lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)

In [18]:
count_parameters(nn_model)

+--------------+------------+
|   Modules    | Parameters |
+--------------+------------+
| dnn.0.weight |  1572864   |
|  dnn.0.bias  |    2048    |
| dnn.3.weight |  2097152   |
|  dnn.3.bias  |    1024    |
| dnn.6.weight |    1024    |
|  dnn.6.bias  |     1      |
+--------------+------------+
Total Trainable Params: 3674113


3674113

### train

In [None]:
loss_f = torch.nn.MSELoss()
min_val_loss = 10

for epoch in range(epochs):
    train_loss_sum = 0.0
    val_loss_sum = 0.0
    train_loss = 0.0
    val_loss = 0.0
    start_time = timer()
    
    #train model
    nn_model.train()
    for x_train,y_train in train_loader:
        
        x_train,y_train = x_train.to(device),y_train.to(device)
        output = nn_model(x_train)
        loss = loss_f(output, y_train)        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()              
        train_loss_sum += loss.item()

    train_loss = train_loss_sum / len(train_loader)
    train_time = round(timer() - start_time)

    #val
    nn_model.eval()
    for x_val,y_val in val_loader: 

        x_val,y_val = x_val.to(device),y_val.to(device)
        val_output = nn_model(x_val)
        loss = loss_f(val_output, y_val)
        val_loss_sum += loss.item()
        
    val_loss = val_loss_sum / len(val_loader)
    
    # val_loss若小於當前最好的loss就把模型存起來
    if val_loss < min_val_loss:
        min_val_loss = val_loss
        
         #若要存在其他地方檔案位置要改
        model_out_file = './model_1rv/RV1-RoBERTa-2048-1024-valMSE_{:.4f}-batch_size_{}-lr_{}-epoch_{}.model'.format(
                    val_loss,
                    batch_size,
                    learning_rate,
                    epoch+1)
        torch.save(nn_model.state_dict(), model_out_file)
    
    log = f"[Epoch:{epoch+1}] Train MSE: {train_loss:.4f} Val MSE: {val_loss:.4f} Epoch train time = {train_time:.3f}s "
    print(log)

#### 以下test 若要改跑其他模型或檔案 要更改檔案位置

In [19]:
# load model
nn_model = NN(input_dim,1).to(device)
nn_model.load_state_dict(torch.load('./model_1rv/RV1-RoBERTa-2048-1024-valMSE_0.7379-batch_size_32-lr_0.0001-epoch_50.model'))
nn_model.eval()
loss_f = torch.nn.MSELoss()
test_loss_sum = 0.0

lst=[]
y_pred_np = np.array(lst)
y_true_np = np.array(lst)

for x_test,y_test in test_loader: 
    
    x_test,y_test = x_test.to(device),y_test.to(device)
    y_hat = nn_model(x_test)
    loss = loss_f(y_hat, y_test)
    test_loss_sum += loss.item()   

    y_hat = y_hat.view(-1)
    y_test = y_test.view(-1)
    y_hat = y_hat.cpu().detach().numpy()
    y_np = y_test.cpu().detach().numpy()
    
    y_pred_np = np.concatenate((y_pred_np,y_hat))
    y_true_np = np.concatenate((y_true_np,y_np))
        
test_loss = test_loss_sum / len(test_loader)
log = f"Test MSE loss: {test_loss:.4f}  "
print(log)  

Test MSE loss: 0.7280  


In [20]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_true_np,y_pred_np)
rmse = np.sqrt(mean_squared_error(y_true_np,y_pred_np))

print('MSE:',round(mse,4))
print('RMSE:',round(rmse,4))

from sklearn.metrics import mean_absolute_error
#MAE
def mae_value(y_true_np, y_pred_np):
    mae = mean_absolute_error(y_true_np, y_pred_np)
    return mae
#MAPE
from sklearn.utils import check_array
def mean_absolute_percentage_error(y_true_np, y_pred_np): 
    y_true_np, y_pred_np = np.array(y_true_np), np.array(y_pred_np)
    mape = np.mean(np.abs((y_true_np - y_pred_np) / y_true_np)) * 100
    return mape
MAE = mae_value(y_true_np,y_pred_np)
print('MAE:',round(MAE,4))
MAPE = mean_absolute_percentage_error(y_true_np,y_pred_np)
print('MAPE:',round(MAPE,4))

MSE: 0.728
RMSE: 0.8532
MAE: 0.5714
MAPE: 29.3258
