In [2]:
import numpy as np
data = np.load("non-text-feature.npz")

In [3]:
list(data.keys())

['indices_train',
 'indices_val',
 'indices_test',
 'userID',
 'userID_onehot',
 'itemID',
 'itemID_onehot',
 'verified',
 'review_length',
 'rating',
 'days',
 'weekdays',
 'month',
 'vote']

In [32]:
indices_train = data['indices_train']
indices_val = data['indices_val']
indices_test = data['indices_test']

userID=data['userID']
itemID=data['itemID']
vote=np.log(data['vote']+1)

num_user = np.max(userID)+1
num_item = np.max(itemID)+1

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
# Mapping user and game IDs to indices
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [27]:
class MyModel(nn.Module):
    def __init__(self, num_user, num_item, lambda_beta=0.01):
        super(MyModel,  self).__init__()
        self.lambda_beta = torch.scalar_tensor(lambda_beta).to(device)

        # init parameters
        self.alpha = nn.Parameter(torch.randn(1))
        self.beta_user = nn.Parameter(torch.randn(num_user, 1))
        self.beta_item = nn.Parameter(torch.randn(num_item, 1))

        self.to(device)

    def forward(self, userID,  gameID):
        return (self.alpha + self.beta_user[userID] + self.beta_item[gameID]).squeeze()
    
    def fit(self, train, val,  lr = 0.001,  num_epochs=3000, patience = 5):
        userID_train = torch.tensor(train[0], device=device)
        itemID_train = torch.tensor(train[1], device=device)
        votes_train  = torch.tensor(train[2], device=device, dtype=torch.float)
        userID_val = torch.tensor(val[0], device=device)
        itemID_val = torch.tensor(val[1], device=device)
        votes_val  = torch.tensor(val[2], device=device, dtype=torch.float)

        optimizer = optim.Adam(self.parameters(),  lr=lr)
        criterion = nn.MSELoss()

        #early stop
        best_val_loss = float('inf')
        epochs_no_improve = 0

        # train loop
        bar = tqdm(range(num_epochs))
        for epoch in bar:
            optimizer.zero_grad()
            predictions = self(userID_train,  itemID_train)
            # Compute loss
            loss = criterion(predictions,  votes_train)
            loss +=  self.lambda_beta*(self.beta_user.norm(2) + self.beta_item.norm(2))
            # backworks
            loss.backward()
            optimizer.step()
            # Early stopping check
            with torch.no_grad():
                val_preds = self(userID_val, itemID_val) # ignore cold start
                val_loss = criterion(val_preds,  votes_val)
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1
            if epochs_no_improve == patience:
                print(f'Early stopping triggered after {epoch + 1} epochs')
                break
            bar.set_postfix({"Epoch":epoch,  "loss": loss.item(),  "val_Loss": val_loss.item()})
        
        return best_val_loss

In [30]:
model = MyModel(num_user, num_item)

model.fit(
    [userID[indices_train], itemID[indices_train], vote[indices_train]], 
    [userID[indices_val], itemID[indices_val], vote[indices_val]],
    lr =0.01, num_epochs=500
)

  0%|          | 0/500 [00:00<?, ?it/s, Epoch=0, loss=11.7, val_Loss=4.08]

 48%|████▊     | 238/500 [00:00<00:00, 403.34it/s, Epoch=237, loss=1.06, val_Loss=0.807]

Early stopping triggered after 239 epochs





tensor(0.8071, device='cuda:0')

In [37]:
from sklearn.metrics import accuracy_score, mean_squared_error
prediction_test = model(userID[indices_test], itemID[indices_test]).cpu().detach().numpy()
mean_squared_error(vote[indices_test], prediction_test)

0.8123578005226919