In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import time
from sklearn.preprocessing import MinMaxScaler


In [5]:
class FM(nn.Module):
    def __init__(self, n, k):
        super(FM, self).__init__()
        self.n = n
        self.k = k
        self.Feature_Matrix = nn.Parameter(torch.randn(n, k), requires_grad=True)
        self.linear = nn.Linear(n, 1)

    def forward(self, x):
        linear_Layer = self.linear(x)

        p1 = torch.matmul(x, self.Feature_Matrix).pow(2)
        p2 = torch.matmul(x.pow(2), self.Feature_Matrix.pow(2))

        interaction_layer = 0.5 * (p1 - p2).sum(dim=1, keepdim=True)

        return linear_Layer + interaction_layer

model = FM(5,5)  
model.load_state_dict(torch.load('./model/model_v2.pth'))

<All keys matched successfully>

In [6]:
class MovieLens(Dataset):
    def __init__(self, feature, label=None):
        self.feature = feature
        self.label = label

    def __len__(self):
        return len(self.feature)

    def __getitem__(self, idx):
        x = self.feature[idx]
        y = self.label[idx]
        return x, y
    
def data_tensor(dataset):

    # scaler = MinMaxScaler()

    label = torch.tensor(dataset['rating'].values).float()
    dataset.drop(columns=['rating'], inplace=True)
    # features = scaler.fit_transform(dataset.values)
    # features = torch.tensor(features).float()

    features = torch.tensor(dataset.values).float()
    # print(features,label)

    return MovieLens(features, label)

In [7]:
batch_size = 4096 # Needed to be modified!

test_set_path = {"initial":'./data/test_set.csv',
                "full": '',
                "sampled":"./data/bpr_sampled_test_set.csv" }

test_set = pd.read_csv(test_set_path['initial'], header=0)

positive_test_set = test_set[test_set['rating']==1]

# test_set = data_tensor(test_set)
# test_dataloader = DataLoader(test_set, batch_size=batch_size, shuffle=True) 


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

K = 20
def full_ranking(model, test_dataloader):
    ranked_user_movies = []
    model.eval()
    for feature, label in test_dataloader:
        feature, label = feature.to(device), label.to(device).unsqueeze(1)
        with torch.no_grad():
            movieId = feature[:,3] # to select the movieId
            print("moviedId are: ", movieId)
            print("num of the movies: ", len(movieId))
            y_hat = model(feature.to(device))
            print("score after predicting: ", y_hat)
            y  = [i[0] for i in y_hat]
            ranked_indices = np.argsort(y)[::-1]
            print("ranked index: ",ranked_indices)

            ranked_movie = [movieId[i] for i in ranked_indices][:K]
            print("ranked movie id: ", ranked_movie)
            # ranked_user_movies.append(ranked_movie)  
    return ranked_movie

In [13]:
def feature_load(user, movies_id):
    user_features = pd.read_csv('./data/user_features.csv', header=0)
    item_features = pd.read_csv('./data/item_features.csv', header=0)
    
    user_feature = user_features[user_features['userId']==user]
    userId = int(user_feature['userId'])
    age = int(user_feature['age'])
    gender = int(user_feature['gender'])

    filtered_item = item_features[item_features['movieId'].isin(movies_id)]
    filtered_item.loc[:, 'userId'] = userId
    filtered_item.loc[:, 'age'] = age
    filtered_item.loc[:, 'gender'] = gender
    filtered_item.loc[:, 'rating'] = 1

    new_order = ['userId', 'age', 'gender', 'movieId', 'category','rating']
    filtered_item = filtered_item[new_order]

    return filtered_item
        

In [19]:
def recall(ranked_list, ground_truth):
    return len(set(ranked_list) & set(ground_truth)), len(ground_truth), len(set(ranked_list) & set(ground_truth))/len(ground_truth)

def ndcg(ranked_list, ground_truth):
    dcg = 0.0
    idcg = 0.0
    for idx, movie in enumerate(ranked_list):
        revised_idx = idx+1
        if movie in ground_truth:
            dcg += 1.0 / np.log2(revised_idx + 1)
        idcg += 1.0 / np.log2(revised_idx + 1)
    return dcg, idcg, dcg / idcg


In [24]:

def evaluate(model, test_set):
    users_id = test_set['userId'].unique()
    movies_id = test_set['movieId'].unique()
    total_numerator = 0
    total_denominator = 0
    total_dcg = 0
    total_idcg= 0

    for user in users_id:
        user_data  = feature_load(user, movies_id)
        user_data = data_tensor(user_data)
        grouped_test_set = test_set.groupby('userId')
        # print(user_data)

        test_dataloader = DataLoader(user_data, batch_size=batch_size, shuffle=True) 
        ranked_list = full_ranking(model,test_dataloader)
        ranked_list =  [int(tensor.item()) for tensor in ranked_list]
        ground_truth = grouped_test_set.get_group(user)['movieId'].tolist()

        numerator, denominator, user_recall = recall(ranked_list, ground_truth)
        total_numerator+=numerator
        total_denominator+=denominator        
        dcg, idcg, user_ndcg = ndcg(ranked_list, ground_truth)
        total_dcg+=dcg        
        total_idcg+=idcg
        print("recall of user "+ str(user) + " is :" + str(user_recall))
        print("ndcg of user "+ str(user) + " is :" + str(user_ndcg))
        break
    


In [25]:
evaluate(model, positive_test_set)

moviedId are:  tensor([1458., 1565.,  343.,  ...,   21.,  752., 3762.])
num of the movies:  3064
score after predicting:  tensor([[ 11446.4648],
        [   193.2577],
        [  8966.9082],
        ...,
        [   563.0009],
        [ 14516.1348],
        [112146.3984]])
ranked index:  [1826 1869  202 ... 2748 2078 2921]
ranked movie id:  [tensor(3877.), tensor(3867.), tensor(3864.), tensor(3787.), tensor(3779.), tensor(3765.), tensor(3737.), tensor(3808.), tensor(3793.), tensor(3723.), tensor(3729.), tensor(3728.), tensor(3711.), tensor(3710.), tensor(3659.), tensor(3727.), tensor(3613.), tensor(3612.), tensor(3617.), tensor(3601.)]
recall of user 0 is :0.0
ndcg of user 0 is :0.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_item.loc[:, 'userId'] = userId
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_item.loc[:, 'age'] = age
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_item.loc[:, 'gender'] = gender
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index