In [1]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import torch.nn.functional as F
from torch import optim
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from timeit import default_timer as timer
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_forecasting import RMSE,MAE,MAPE
from prettytable import PrettyTable

### Loading data
appId/userId/rating : int32

In [2]:
train_rv = torch.load('./data/google-RB.train.review.pt')
train_userId = torch.load('./data/google.train.userId.pt') 
train_appId = torch.load('./data/google.train.appId.pt')
train_rating = torch.load('./data/google.train.rating.pt')

val_rv = torch.load('./data/google-RB.val.review.pt')
val_userId = torch.load('./data/google.val.userId.pt')  
val_appId = torch.load('./data/google.val.appId.pt') 
val_rating = torch.load('./data/google.val.rating.pt') 

test_rv = torch.load('./data/google-RB.test.review.pt')
test_userId = torch.load('./data/google.test.userId.pt')  
test_appId = torch.load('./data/google.test.appId.pt')
test_rating = torch.load('./data/google.test.rating.pt') 


In [3]:
# 還原維度

train_userId = train_userId.view(-1)
train_appId = train_appId.view(-1)
train_rating = train_rating.view(-1)

val_userId = val_userId.view(-1)
val_appId =val_appId.view(-1)
val_rating = val_rating.view(-1)

test_userId = test_userId.view(-1)
test_appId = test_appId.view(-1)
test_rating = test_rating.view(-1)

In [4]:
# get_user_item_matrix_indices
user_indices, item_indices, ratings = [], [], []
for idx,i in enumerate(train_rating):
    user_indices.append(train_userId[idx])
    item_indices.append(train_appId[idx])
    ratings.append(i)
    
user_item_rating_indices = [np.array(user_indices), np.array(item_indices), np.array(ratings)]   
user_indices, item_incides, rating_data = user_item_rating_indices

### 模型參數

In [5]:
batch_size = 32
epochs = 50
learning_rate =0.0001

# rating network
layers = [128,64] #[embbeding size, layers[0] user/item network 的 latent factor數] \
layers_cat = [512,256,64] # user和item 串聯後的各dnn層神經元數:512/256; 256/128; 128/64等

# review network
rv_input_dim = 2*768
rv_output_dim = 64

### TensorDataset、Dataloader
Dataloader：把 Dataset類轉換成方便model處理的東西

In [6]:
from torch.utils.data import DataLoader, Dataset
class UserItemRatingDataset(Dataset):
    def __init__(self, user, item, target, review_3):
        self.user = user
        self.item = item
        self.target = target
        self.review_3 = review_3
        
    def __getitem__(self, index):
        return self.user[index], self.item[index], self.target[index], self.review_3[index]
    
    def __len__(self):
        return self.user.size(0)

In [7]:
train_dataset = UserItemRatingDataset(train_userId, train_appId, train_rating, train_rv) # train 為 Dataset object 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = UserItemRatingDataset(test_userId, test_appId, test_rating, test_rv)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

val_dataset = UserItemRatingDataset(val_userId, val_appId, val_rating, val_rv)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

### 構建神經元網絡

In [8]:
class RARV2_model(torch.nn.Module):
    
    def __init__(self, num_users, num_items, layers, layers_cat, rv_input_dim, rv_output_dim):   
        super().__init__()  

        # RV2_model
        self.rv2_dnn = torch.nn.Sequential(
            torch.nn.Linear(rv_input_dim,2048),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(2048,1024),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(1024,rv_output_dim),
        )
           
        # RA_model
        self.num_users = num_users
        self.num_items = num_items
        self.latent_dim = layers[0]
        self.layers = layers
        self.latent_dim_concat = layers_cat[0] 
        self.layers_cat = layers_cat
        self.user_str = 'torch.int64'
        
        #傳入評分矩陣數據
        self.user_item_indices = torch.LongTensor([user_indices, item_indices]) #行列: 即 userId/itemId       
        self.rating_data = train_rating
        self.user_item_matrix = torch.sparse_coo_tensor(self.user_item_indices,
                                                        self.rating_data,
                                                        torch.Size((self.num_users, self.num_items))).to_dense().to(device)
        
        # 先分別給定用戶/項目/ pi & qj 串聯網路 第一層神經網路的參數  
        # user Layer 1 to N
        self.linear_user_1 = nn.Linear(in_features=self.num_items, out_features=self.latent_dim)
        self.linear_user_1.weight.detach().normal_(0, 0.01)
        self.user_fc_layers = nn.ModuleList()
        for idx in range(1, len(self.layers)):
            self.user_fc_layers.append(nn.Linear(in_features=self.layers[idx - 1], out_features=self.layers[idx]))
        
        # item Layer 1 to N
        self.linear_item_1 = nn.Linear(in_features=self.num_users, out_features=self.latent_dim)
        self.linear_item_1.weight.detach().normal_(0, 0.01)  
        self.item_fc_layers = nn.ModuleList()
        for idx in range(1, len(self.layers)):
            self.item_fc_layers.append(nn.Linear(in_features=self.layers[idx - 1], out_features=self.layers[idx]))
            
        # user & item concat layer
        self.linear_concat_1 = nn.Linear(in_features=128, out_features=self.latent_dim_concat) # in_features=use+item
        self.linear_concat_1.weight.detach().normal_(0, 0.01)  
        self.concat_layers = nn.ModuleList()
        for idx in range(1, len(self.layers_cat)):
            self.concat_layers.append(nn.Linear(in_features=self.layers_cat[idx - 1], out_features=self.layers_cat[idx]))
        
        self.dropout_layer  = torch.nn.Dropout(0.5)
        
        self.final = nn.Linear(128,1)
        
    def forward(self, user_indices, item_indices, rv2, idx):
        
        # RV2_model
        rv2 = rv2.view(rv2.size(0), -1)
        rv2 = self.rv2_dnn(rv2)
        
        # RA_model       
        user = self.user_item_matrix[user_indices]
        item = self.user_item_matrix[:, item_indices].t()
        
        user = self.linear_user_1(user)
        for idx in range(len(self.layers) - 1):
            user = F.relu(user)
            user = self.dropout_layer(user)
            user = self.user_fc_layers[idx](user)

        item = self.linear_item_1(item)
        for idx in range(len(self.layers) - 1):
            item = F.relu(item)
            item = self.dropout_layer(item)
            item = self.item_fc_layers[idx](item)     
            
        #concat pi & qj and 丟入神經網路訓練
        concat_user_item = torch.cat((user, item), 1)  
        concat_user_item = self.linear_concat_1(concat_user_item)
        for idx in range(len(self.layers_cat) - 1):
            concat_user_item = F.relu(concat_user_item)
            concat_user_item = self.dropout_layer(concat_user_item)
            concat_user_item = self.concat_layers[idx](concat_user_item)
       
    
        # RA_model & RV2_model 串聯
        concat_all = torch.cat((concat_user_item,rv2),1) 
        concat_all = self.final(concat_all)

        y_hat = concat_all.view(-1)
        
        return y_hat

In [9]:
def count_parameters(model): # 模型參數統計
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: 
            continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params


#build model
rarv2_model = RARV2_model(135331,9095, layers, layers_cat, rv_input_dim, rv_output_dim).to(device) # num_users, num_items
optimizer = torch.optim.Adam(rarv2_model.parameters(),lr=learning_rate)  
rarv2_model

RARV2_model(
  (rv2_dnn): Sequential(
    (0): Linear(in_features=1536, out_features=2048, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=2048, out_features=1024, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=1024, out_features=64, bias=True)
  )
  (linear_user_1): Linear(in_features=9095, out_features=128, bias=True)
  (user_fc_layers): ModuleList(
    (0): Linear(in_features=128, out_features=64, bias=True)
  )
  (linear_item_1): Linear(in_features=135331, out_features=128, bias=True)
  (item_fc_layers): ModuleList(
    (0): Linear(in_features=128, out_features=64, bias=True)
  )
  (linear_concat_1): Linear(in_features=128, out_features=512, bias=True)
  (concat_layers): ModuleList(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=64, bias=True)
  )
  (dropout_layer): Dropout(p=0.5, inplace=False)
  (final): Linear(in_features=128, 

In [10]:
# count_parameters(rarv2_model) # 模型參數統計

### train

In [None]:
loss_f = torch.nn.MSELoss()
min_val_loss = 10

for epoch in range(epochs):
    train_loss_sum = 0.0
    val_loss_sum = 0.0
    train_loss = 0.0
    val_loss = 0.0
    
    start_time = timer()
    
    #train model
    rarv2_model.train()
    for idx, (user, item, y, rv2) in enumerate(train_loader):
        user = user.long()
        item = item.long()
        user, item, y, rv2 = user.to(device), item.to(device), y.to(device), rv2.to(device)
        y_hat = rarv2_model(user, item, rv2, idx)     
        loss = loss_f(y_hat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()              
        train_loss_sum += loss.item()

    train_loss = train_loss_sum / len(train_loader)
    train_time = round(timer() - start_time)

    #val
    rarv2_model.eval()
    for idx,(user,item,y,rv2) in enumerate(val_loader):
        
        user = user.long()
        item = item.long()
        
        user, item, y, rv2 = user.to(device), item.to(device), y.to(device), rv2.to(device)
        y_hat = rarv2_model(user, item, rv2, idx)
        val_loss = loss_f(y_hat, y) 
        val_loss_sum += val_loss.item()
        
    val_loss = val_loss_sum / len(val_loader)
    
    # val_loss若小於當前最好的loss就把模型存起來
    if val_loss < min_val_loss:
        min_val_loss = val_loss
        #若要存在其他地方檔案位置要改
        model_out_file = './model_2rv_ra/RARV2-rv-2048-1024-64-ra-128-64-512-256-64-valMSE_{:.4f}-batch_size_{}-lr_{}-epoch_{}.model'.format(
                    val_loss,
                    batch_size,
                    learning_rate,
                    epoch+1)
        torch.save(rarv2_model.state_dict(), model_out_file)
    
    log = f"[Epoch:{epoch+1}] Train MSE: {train_loss:.4f} Val MSE: {val_loss:.4f} Epoch train time = {train_time:.3f}s "
    print(log)

以下test 若要改跑其他模型或檔案 要更改檔案位置

In [11]:
# load model
rarv2_model = RARV2_model(135331,9095, layers, layers_cat, rv_input_dim, rv_output_dim).to(device)
rarv2_model.load_state_dict(torch.load('./model_2rv_ra/RARV2-rv-2048-1024-64-ra-128-64-512-256-64-valMSE_0.6140-batch_size_32-lr_0.0001-epoch_50.model'))
rarv2_model.eval()
loss_f = torch.nn.MSELoss()
test_loss_sum = 0.0 

lst=[]
y_pred_np = np.array(lst)
y_true_np = np.array(lst)

for idx,(user,item,y,rv2) in enumerate(test_loader):
        
    user = user.long()
    item = item.long()   
    user, item, y, rv2 = user.to(device), item.to(device), y.to(device), rv2.to(device)
    y_hat = rarv2_model(user, item, rv2, idx)
    test_loss = loss_f(y_hat, y) 
    test_loss_sum += test_loss.item()  
    y_hat = y_hat.cpu().detach().numpy()
    y_np = y.cpu().detach().numpy()
    
    y_pred_np = np.concatenate((y_pred_np,y_hat))
    y_true_np = np.concatenate((y_true_np,y_np))
        
test_loss = test_loss_sum / len(test_loader)
log = f"Test MSE loss: {test_loss:.4f}  "
print(log)        

Test MSE loss: 0.6273  


In [12]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_true_np,y_pred_np)
rmse = np.sqrt(mean_squared_error(y_true_np,y_pred_np))

print('MSE:',round(mse,4))
print('RMSE:',round(rmse,4))

from sklearn.metrics import mean_absolute_error
#MAE
def mae_value(y_true_np, y_pred_np):
    mae = mean_absolute_error(y_true_np, y_pred_np)
    return mae
#MAPE
from sklearn.utils import check_array
def mean_absolute_percentage_error(y_true_np, y_pred_np): 
    y_true_np, y_pred_np = np.array(y_true_np), np.array(y_pred_np)
    mape = np.mean(np.abs((y_true_np - y_pred_np) / y_true_np)) * 100
    return mape

MAE = mae_value(y_true_np,y_pred_np)
print('MAE:',round(MAE,4))
MAPE = mean_absolute_percentage_error(y_true_np,y_pred_np)
print('MAPE:',round(MAPE,4))

MSE: 0.6273
RMSE: 0.792
MAE: 0.5274
MAPE: 26.5673
