In [8]:
!pip install torch-sparse torch-scatter -f https://data.pyg.org/whl/torch-2.0.0%2Bcu117.html

Looking in links: https://data.pyg.org/whl/torch-2.0.0%2Bcu117.html
Collecting torch-scatter
  Using cached https://data.pyg.org/whl/torch-2.0.0%2Bcu117/torch_scatter-2.1.1%2Bpt20cu117-cp38-cp38-win_amd64.whl (3.6 MB)
Installing collected packages: torch-scatter
Successfully installed torch-scatter-2.1.1+pt20cu117


In [37]:
!pip install pandas matplotlib tensorboard

Collecting tensorboard
  Using cached tensorboard-2.12.0-py3-none-any.whl (5.6 MB)
Collecting markdown>=2.6.8
  Using cached Markdown-3.4.1-py3-none-any.whl (93 kB)
Collecting google-auth-oauthlib<0.5,>=0.4.1
  Using cached google_auth_oauthlib-0.4.6-py2.py3-none-any.whl (18 kB)
Collecting werkzeug>=1.0.1
  Using cached Werkzeug-2.2.3-py3-none-any.whl (233 kB)
Collecting absl-py>=0.4
  Using cached absl_py-1.4.0-py3-none-any.whl (126 kB)
Collecting grpcio>=1.48.2
  Downloading grpcio-1.51.3-cp38-cp38-win_amd64.whl (3.7 MB)
     ---------------------------------------- 3.7/3.7 MB 12.0 MB/s eta 0:00:00
Collecting google-auth<3,>=1.6.3
  Downloading google_auth-2.16.2-py2.py3-none-any.whl (177 kB)
     ---------------------------------------- 177.2/177.2 kB ? eta 0:00:00
Collecting protobuf>=3.19.6
  Downloading protobuf-4.22.1-cp38-cp38-win_amd64.whl (420 kB)
     ------------------------------------- 420.6/420.6 kB 13.2 MB/s eta 0:00:00
Collecting tensorboard-plugin-wit>=1.6.0
  Using c

In [11]:
from torch_geometric.nn.models.lightgcn import LightGCN
import pandas as pd
import os
from tqdm import tqdm
import torch
import numpy as np

## Load Data
We can begin by loading in the user review data. For each user, we have a subset of the movies that they reviewed. We'll load each of the CSVs as dataframes, and store a dict of user IDs corresponding to their dataframes.

In [12]:
# for now we will use the first 10k rows of the data, set to None to use all data
AMOUNT_TO_LOAD = 500

In [13]:
user_reviews_dir = 'user_reviews'
user_review_data = dict()

for filename in tqdm(os.listdir(user_reviews_dir)):
    if AMOUNT_TO_LOAD is not None and len(user_review_data) >= AMOUNT_TO_LOAD:
        break
    try:
        user_review_data[filename] = pd.read_csv(os.path.join(user_reviews_dir, filename), encoding='unicode_escape')
    except pd.errors.EmptyDataError:
        print(f'Empty file: {filename}')
        pass

  1%|          | 335/63111 [00:01<06:10, 169.55it/s]

Empty file: 468889434_reviews.csv


  1%|          | 501/63111 [00:02<06:04, 171.70it/s]


Now let's split the data into training, validation, and test sets. Since this is a recommender, we're gonna split by removing some of the user's reviews.

For every user, so long as the user has more than 5 reviews, remove one review for the validation set and one review for the test set.

In [14]:
print(list(user_review_data.keys())[0])

0001kidd_reviews.csv


In [15]:
# remove all values with nan in the review column
for key in tqdm(user_review_data.keys()):
    user_review_data[key] = user_review_data[key].dropna(subset=['movie_rating'])

100%|██████████| 500/500 [00:00<00:00, 1140.44it/s]


In [16]:
train_reviews = []
validation_reviews = []
test_reviews = []
for user_id, reviews in tqdm(user_review_data.items()):
    if len(reviews) > 80:
        validation_review_data_df = reviews.sample(40, replace=False)
        validation_review_data = validation_review_data_df.to_dict('records')
        for review in validation_review_data:
            review['user_id'] = user_id
        validation_reviews.extend(validation_review_data)
        # remove the validation reviews from the training data
        reviews = reviews.drop(validation_review_data_df.index)
        test_review_data_df = reviews.sample(20, replace=False)
        test_review_data = test_review_data_df.to_dict('records')
        for review in test_review_data:
            review['user_id'] = user_id
        test_reviews.extend(test_review_data)
        # remove the test reviews from the training data
        reviews = reviews.drop(test_review_data_df.index)
        train_review_data = reviews.to_dict('records')
        for review in train_review_data:
            review['user_id'] = user_id
        train_reviews.extend(train_review_data)
    else:
        # if the user has less than 5 reviews, we will use all of them for training
        train_review_data = reviews.to_dict('records')
        for review in train_review_data:
            review['user_id'] = user_id
        train_reviews.extend(train_review_data)

print(f'Train reviews: {len(train_reviews)}')
print(f'Validation reviews: {len(validation_reviews)}')
print(f'Test reviews: {len(test_reviews)}')

100%|██████████| 500/500 [00:01<00:00, 369.99it/s]

Train reviews: 160650
Validation reviews: 14000
Test reviews: 7000





## Build the Model
Now that we have the training data, let's construct the model to train.

In [17]:
num_train_users = len(set([review['user_id'] for review in train_reviews]))
num_train_items = len(set([review['movie_id'] for review in train_reviews]))
num_total_items = len(set([review['movie_id'] for review in train_reviews + validation_reviews + test_reviews]))
num_nodes = num_train_users + num_total_items
print(f'Number of train users: {num_train_users}')
print(f'Number of train items: {num_train_items}')
print(f'Number of nodes: {num_nodes}')

Number of train users: 500
Number of train items: 28009
Number of nodes: 29592


In [18]:
num_val_users = len(set([review['user_id'] for review in validation_reviews]))
num_val_items = len(set([review['movie_id'] for review in validation_reviews]))
num_val_nodes = num_val_users + num_val_items

In [19]:
# Let's map users to ids
movie_id_to_movie_name = dict()
for review in train_reviews + validation_reviews + test_reviews:
    movie_id_to_movie_name[review['movie_id']] = review['movie_title']

user_to_id = dict()
for i, user_id in enumerate(set([review['user_id'] for review in train_reviews + validation_reviews + test_reviews])):
    user_to_id[user_id] = i

# Let's map movies to ids
movie_to_id = dict()
for i, movie_id in enumerate(set([review['movie_id'] for review in train_reviews + validation_reviews + test_reviews])):
    movie_to_id[movie_id] = i + num_train_users

# Let's map ids to users
id_to_user = dict()
for user_id, index in user_to_id.items():
    id_to_user[index] = user_id

# Let's map ids to movies
id_to_movie = dict()
for movie_id, index in movie_to_id.items():
    id_to_movie[index] = movie_id

# Let's map movie names to movie ids
movie_name_to_movie_id = dict()
for movie_id, movie_name in movie_id_to_movie_name.items():
    movie_name_to_movie_id[movie_name] = movie_id

In [20]:
import random

def convert_review_to_edge(review):
    user_id = user_to_id[review['user_id']]
    movie_id = movie_to_id[review['movie_id']]
    edge_weight = review['movie_rating']
    if (edge_weight < 3.5 and edge_weight > 2.5):
        return None, None
    edge = (user_id, movie_id)
    edge_weight = review['movie_rating']
    return edge, edge_weight

def shuffle_edges_and_edge_weights(edges, edge_weights):
    c = list(zip(edges, edge_weights))
    random.shuffle(c)
    return zip(*c)

def convert_reviews_to_edges(reviews):
    edges = []
    edge_weights = []
    for review in tqdm(reviews):
        edge, edge_weight = convert_review_to_edge(review)
        if edge is not None:
            edges.append(edge)
            edge_weights.append(edge_weight)
    
    # Reformat the edges to be a tensor
    edges = torch.tensor(edges, dtype=torch.long).t().contiguous()
    return edges, edge_weights

In [21]:
# Now let's create the edges between users and movies.
# The id of the user will be the index of the user in the user_to_id dict
# The id of the movie will be the index of the movie in the movie_to_id dict + the number of users

train_edges, train_edge_weights = convert_reviews_to_edges(train_reviews)
validation_edges, validation_edge_weights = convert_reviews_to_edges(validation_reviews)

print(f'Train edges: {train_edges.shape[1]}')
print(f'Validation edges: {validation_edges.shape[1]}')

100%|██████████| 160650/160650 [00:00<00:00, 927686.57it/s]
100%|██████████| 14000/14000 [00:00<00:00, 932467.18it/s]

Train edges: 130791
Validation edges: 11591





In [22]:
import torch_geometric.data as data

# create the graph
train_graph = data.Data(
    edge_index=train_edges,
    edge_attr=torch.tensor(train_edge_weights),
    num_nodes=num_nodes
)

validation_graph = data.Data(
    edge_index=validation_edges,
    edge_attr=torch.tensor(validation_edge_weights),
    num_nodes=num_nodes
)

In [23]:
train_graph.validate(raise_on_error=True)
validation_graph.validate(raise_on_error=True)

True

In [24]:
# Let's create some negative edges
def resample_edges_for_user(user_positive_edges, user_negative_edges):
    num_negative_edges_to_add = user_positive_edges.shape[1] * 3 - user_negative_edges.shape[1]
    if (num_negative_edges_to_add <= 0):
        num_negative_edges_to_remove = -num_negative_edges_to_add
        # choose the negative edges to keep
        negative_edges_to_keep = torch.randint(user_negative_edges.shape[1], (user_negative_edges.shape[1] - num_negative_edges_to_remove,))
        # remove all the negative edges for this user
        user_negative_edges = user_negative_edges[:, negative_edges_to_keep]
    else:
        # Create new negative edges
        negative_edges_to_add = torch.tensor([[user_id] * num_negative_edges_to_add, torch.randint(num_train_users, num_train_items, (num_negative_edges_to_add,))], dtype=torch.long)
        # Add the negative edges to the negative edges for this user
        user_negative_edges = torch.cat([user_negative_edges, negative_edges_to_add], dim=1)
    return user_positive_edges, user_negative_edges
        

In [98]:
# let's compute ndcg
def compute_ndcg_at_k(relevances, k=5):
    dcg = 0
    for i, relevance in enumerate(relevances):
        if i == k:
            break
        dcg += (relevance) / np.log2(i + 2)
    idcg = 0
    for i, relevance in enumerate(sorted(relevances, reverse=True)):
        if i == k:
            break
        idcg += (relevance) / np.log2(i + 2)
    return dcg / idcg

In [99]:
def get_user_positive_items(edge_index):
    """Generates dictionary of positive items for each user

    Args:
        edge_index (torch.Tensor): 2 by N list of edges

    Returns:
        dict: dictionary of positive items for each user
    """
    user_pos_items = {}
    for i in range(edge_index.shape[1]):
        user = edge_index[0][i].item()
        item = edge_index[1][i].item()
        if user not in user_pos_items:
            user_pos_items[user] = []
        user_pos_items[user].append(item)
    return user_pos_items

In [100]:
import time
def compute_recall_at_k(validation_graph, model, K):
    # get positive edges in validation set
    positive_edges = validation_graph.edge_index[:, validation_graph.edge_attr > 3.5]

    # map users to positive edges
    user_pos_items = get_user_positive_items(positive_edges)

    # get users
    users = positive_edges[0].unique()

    users = users[torch.randint(users.shape[0], (min(200, len(users)),))]
    # filter the validation edges to only the users we want to evaluate
    user_validation_edges = []
    for user in users:
        user_validation_edges.append(validation_graph.edge_index[:, validation_graph.edge_index[0] == user])
    user_validation_edges = torch.cat(user_validation_edges, dim=1)
    print(user_validation_edges.shape)

    first_user_id = users[0].item()
    user_name = id_to_user[first_user_id]
    print(f'User: {user_name}')

    # get movies
    movie_indices = torch.LongTensor([_ for _ in range(len(users) + 1, validation_graph.num_nodes)]).to(device)

    # Get positive items for each user in validation set
    truth_items = [set(user_pos_items[user.item()]) for user in users]

    first_user_truth_items = truth_items[0]
    first_user_truth_items = [id_to_movie[item] for item in first_user_truth_items]
    first_user_truth_items = [movie_id_to_movie_name[item] for item in first_user_truth_items]
    print(first_user_truth_items)

    training_edges = train_graph.edge_index

    # Get top-K recommended items for each user in validation set
    total_recall = 0
    print("Computing recommendations for {} users".format(len(users)))
    for user_index, user_id in tqdm(enumerate(users), total=len(users)):
        tick = time.time()
        all_edges = torch.tensor([(user_id, item_id) for item_id in range(num_train_users, num_train_items)], dtype=torch.long).t().contiguous()
        recommendations = model.recommend(all_edges.to(device), src_index=torch.tensor([user_id]).to(device), dst_index=torch.tensor([x for x in range(num_train_users + 1, num_train_items)]).to(device), k=10 * K)[0]
        tock = time.time()
        train_edges_for_user = training_edges[:, training_edges[0] == user_id].to(device)
        # remove all the recommendations that are in the training set
        recommendations = recommendations[~torch.isin(recommendations, train_edges_for_user[1])][:K]
        if (len(recommendations) < K):
            print("Not enough recommendations for user {}".format(user_id))
            continue
        if (user_id == first_user_id):
            first_user_recommended_items = recommendations
            first_user_recommended_items = [id_to_movie[item.item()] for item in first_user_recommended_items if item.item() > num_train_users]
            first_user_recommended_items = [movie_id_to_movie_name[item] for item in first_user_recommended_items if item in movie_id_to_movie_name]
            print(first_user_recommended_items)
        # num_intersect = 0
        truth_items_for_user = truth_items[user_index]
        # for item in recommendations:
        #     item = item.item()
        #     if item in truth_items_for_user:
        #         num_intersect += 1
        # print(num_intersect)
        num_intersect = len(set([item.item() for item in recommendations]).intersection(truth_items[user_index]))
        recall = num_intersect / len(truth_items_for_user)
        total_recall += recall
    return total_recall / len(users)



In [101]:
from typing import Optional, Union

import torch
import torch.nn.functional as F
from torch import Tensor
from torch.nn import Embedding, ModuleList
from torch.nn.modules.loss import _Loss

from torch_geometric.nn.conv import LGConv
from torch_geometric.typing import Adj, OptTensor, SparseTensor

In [102]:
"""Adapted from https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/nn/models/lightgcn.html"""
class CustomLightGCN(torch.nn.Module):
    """From the <https://arxiv.org/abs/2002.02126>` paper.

    Args:
        num_nodes (int): The number of nodes in the graph.
        embedding_dim (int): The dimensionality of node embeddings.
        num_layers (int): The number of layers.
    """
    def __init__(
        self,
        num_nodes: int,
        embedding_dim: int,
        num_layers: int
    ):
        super().__init__()

        self.num_nodes = num_nodes
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers
        self.embedding = Embedding(num_nodes, embedding_dim)
        self.alpha = torch.tensor([1. / (num_layers + 1)] * (num_layers + 1))
        self.convs = ModuleList([GATConv(embedding_dim, embedding_dim, heads=8, dropout=0.6) for _ in range(num_layers)])
        self.linears = ModuleList([Linear(embedding_dim * 8, embedding_dim) for _ in range(num_layers)])
        torch.nn.init.xavier_uniform_(self.embedding.weight)

    def get_embedding(self, edge_index):
        x = self.embedding.weight
        out = x * self.alpha[0]

        for i in range(self.num_layers):
            x = self.convs[i](x, edge_index)
            x = self.linears[i](x.view(-1, self.embedding_dim * 8))
            out = out + x * self.alpha[i + 1]

        return out


    def forward(self, edge_index):
        edge_label_index = edge_index
        out = self.get_embedding(edge_index)
        user = out[edge_label_index[0]]
        movie = out[edge_label_index[1]]
        return (user * movie).sum(dim=-1)


    def predict_link(self, edge_index, edge_label_index):
        "Predict links between nodes specified in edge_label_index."""
        pred = self(edge_index, edge_label_index).sigmoid()
        return pred.round()


    def recommend(self, edge_index, k):
        """Get top-k recommendations for nodes in src_index."""
        out_user = self.get_embedding(edge_index)
        out_movie = self.get_embedding(edge_index)
        pred = out_user @ out_movie.t()
        top_index = pred.topk(k, dim=-1).indices
        return top_index


    def link_pred_loss(self, pred, edge_label):
        """Computes the model loss for a link prediction using torch.nn.BCEWithLogitsLoss.
        
        Args:
            pred (torch.Tensor): The predictions.
            edge_label (torch.Tensor): The ground-truth edge labels.
        """
        loss_fn = torch.nn.BCEWithLogitsLoss()
        return loss_fn(pred, edge_label.to(pred.dtype))


    def recommendation_loss(self, pos_edge_rank, neg_edge_rank,
                            lambda_reg: float = 1e-4):
        """Computes the model loss for a ranking objective via the Bayesian
        Personalized Ranking (BPR) loss.

        Args:
            pos_edge_rank (torch.Tensor): Positive edge rankings.
            neg_edge_rank (torch.Tensor): Negative edge rankings.
            lambda_reg (int, optional): The L2 regularization strength
                of the Bayesian Personalized Ranking (BPR) loss.
        """
        loss_fn = BPRLoss(lambda_reg)
        return loss_fn(pos_edge_rank, neg_edge_rank, self.embedding.weight)

In [103]:
""" This is verbatim from https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/nn/models/lightgcn.html. """
class BPRLoss(_Loss):
    """The Bayesian Personalized Ranking (BPR) loss."""
    __constants__ = ['lambda_reg']
    lambda_reg: float

    def __init__(self, lambda_reg: float = 0, **kwargs):
        super().__init__(None, None, "sum", **kwargs)
        self.lambda_reg = 0

    def forward(self, positives: Tensor, negatives: Tensor,
                parameters: Tensor = None) -> Tensor:
        """Compute the mean Bayesian Personalized Ranking (BPR) loss.

        Args:
            positives (Tensor): The vector of positive-pair rankings.
            negatives (Tensor): The vector of negative-pair rankings.
            parameters (Tensor, optional): The tensor of parameters which
                should be used for :math:`L_2` regularization
                (default: :obj:`None`).
        """
        n_pairs = positives.size(0)
        log_prob = F.logsigmoid(positives - negatives).mean()
        regularization = 0

        if self.lambda_reg != 0:
            regularization = self.lambda_reg * parameters.norm(p=2).pow(2)

        return (-log_prob + regularization) / n_pairs

In [111]:
import numpy as np
import math
import matplotlib.pyplot as plt

NUM_LAYERS = 1
LR = 1e-1
BATCH_SIZE = min(4096, len(user_review_data))
EMBEDDING_DIM = 512
LOAD_CHECKPOINT = False
K = 20
model = LightGCN(num_nodes=num_nodes, embedding_dim=EMBEDDING_DIM, num_layers=NUM_LAYERS)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

if LOAD_CHECKPOINT:
    model.load_state_dict(torch.load(f'models/{EMBEDDING_DIM}_{NUM_LAYERS}_{1024}_{1e-3}_{num_train_users}_{143295}.pt', map_location=device))

print("Running on device: {}".format(device))
print(EMBEDDING_DIM)

optim = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optim, gamma=0.95)
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optim, milestones=[100, 200, 300, 400], gamma=0.5)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optim, T_0=100)

train_positive_edges = train_graph.edge_index[:, train_graph.edge_attr >= 3.5].cuda()
train_negative_edges = train_graph.edge_index[:, train_graph.edge_attr <= 2.5].cuda()

validation_df = pd.DataFrame.from_dict(validation_reviews)
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(comment=f'LightGCN_{EMBEDDING_DIM}_layers_{NUM_LAYERS}_batch_size_{BATCH_SIZE}_lr_{LR}_num_train_users_{num_train_users}_num_train_items_{num_train_items}_recall_{K}')

for epoch in range(10001):
    # we are using BPR so we go by user
    # We'll proceed in batches of users
    for start_idx in tqdm(range(0, num_train_users, BATCH_SIZE)):
        model.train()
        all_positive_rankings = torch.tensor([]).cuda()
        all_negative_rankings = torch.tensor([]).cuda()
        # randomly select a batch of users
        users_in_batch = torch.randperm(num_train_users)[start_idx:start_idx + BATCH_SIZE]
        # for each user randomly select a positive edge and 5 negative edges
        # use torch to do this efficiently
        for user_id in users_in_batch:
            # get one random positive edge
            user_positive_edges = train_positive_edges[:, train_positive_edges[0] == user_id]
            user_negative_edges = train_negative_edges[:, train_negative_edges[0] == user_id]
            if (user_positive_edges.shape[1] == 0 or user_negative_edges.shape[1] == 0):
                continue
            # randomly select a positive edge
            positive_edge = user_positive_edges[:, torch.randint(0, user_positive_edges.shape[1], (1,))]
            # randomly select 5 negative edges
            negative_edges = user_negative_edges[:, torch.randint(0, user_negative_edges.shape[1], (5,))]
            user_edges = torch.cat((positive_edge, negative_edges), dim=1)
            # get the rankings of the positive and negative edges
            user_rankings = model(user_edges)
            # compute the loss
            positive_rankings = user_rankings[0].unsqueeze(0).repeat(5)
            negative_rankings = user_rankings[1:]
            all_positive_rankings = torch.cat((all_positive_rankings, positive_rankings))
            all_negative_rankings = torch.cat((all_negative_rankings, negative_rankings))
        # compute the loss
        loss = model.recommendation_loss(all_positive_rankings, all_negative_rankings)
        # for user_id in users_in_batch:
        #     # get all the edges specific to this user
        #     user_positive_edges = train_positive_edges[:, train_positive_edges[0] == user_id]
        #     user_negative_edges = train_negative_edges[:, train_negative_edges[0] == user_id]
        #     if (user_positive_edges.shape[1] == 0 or user_negative_edges.shape[1] == 0):
        #         continue
        #     # limit the number of positive edges to 5000
        #     if (user_positive_edges.shape[1] > 5000):
        #         user_positive_edges = user_positive_edges[:, :5000]
        #     # Get at most 15000 negative edges
        #     if (user_negative_edges.shape[1] > 15000):
        #         user_negative_edges = user_negative_edges[:, :15000]
        #     # resample the negative edges if we don't have enough
        #     user_positive_edges, user_negative_edges = resample_edges_for_user(user_positive_edges, user_negative_edges)
        #     # concatenate the positive and negative edges
        #     user_edges = torch.cat([user_positive_edges, user_negative_edges], dim=1)
        #     # get the rankings for this user
        #     user_edges = user_edges.to(device)
        #     user_rankings = model(user_edges)
        #     # divide the rankings into positive and negative rankings
        #     user_positive_rankings = user_rankings[:user_positive_edges.shape[1]]
        #     user_negative_rankings = user_rankings[user_positive_edges.shape[1]:]
        #     # create all pairs of positive and negative rankings
        #     user_positive_rankings = user_positive_rankings.unsqueeze(1).repeat(1, user_negative_rankings.shape[0])
        #     user_negative_rankings = user_negative_rankings.unsqueeze(0).repeat(user_positive_rankings.shape[0], 1)
        #     # get the user loss
        #     user_loss = model.recommendation_loss(user_positive_rankings, user_negative_rankings, 1e-4)
        #     # add the user loss to the total loss
        #     loss = loss + user_loss
        # # divide the loss by the number of users
        # loss = loss / BATCH_SIZE
        # log the loss
        # backprop
        optim.zero_grad()
        loss.backward()
        optim.step()
        writer.add_scalar("Loss/train", loss, epoch * (num_train_users // BATCH_SIZE) + start_idx // BATCH_SIZE)
    if epoch % 50 == 0 and epoch > 0:
        # evaluate the model
        model.eval()
        # iterate over all users in the validation set
        validation_users = list(set([int(x) for x in validation_edges[0, :]]))
        # randomly select 1000 of the users
        validation_users = random.sample(validation_users, min(len(validation_users), 500))
        mean_ndcg = 0
        ndcg_scores = []
        for user in tqdm(validation_users):
            user_id = id_to_user[user]
            relevant_reviews = validation_df[validation_df['user_id'] == user_id]
            user_validation_edges = validation_edges[:, validation_edges[0] == user]
            user_validation_edges = user_validation_edges.to(device)
            user_rankings = model(user_validation_edges)
            edges_sorted = list(user_validation_edges[1, user_rankings.argsort(descending=True)])
            # use validation_df to get the relevances via the movie_id column and the movie_rating column
            relevances = []
            for edge in edges_sorted:
                movie_id = id_to_movie[int(edge)]
                if (movie_id in relevant_reviews['movie_id'].values):
                    relevances.append(relevant_reviews[relevant_reviews['movie_id'] == movie_id]['movie_rating'].values[0])
                else:
                    relevances.append(0)
            # calculate the ndcg
            if (len(relevances) >= K):
                ndcg = compute_ndcg_at_k(relevances, k=K)
            if (math.isnan(ndcg)):
                print(relevant_reviews)
                input()
            mean_ndcg += ndcg
            ndcg_scores.append(ndcg)
        mean_ndcg = mean_ndcg / len(validation_users)
        print("Standard Deviation: {}".format(np.std(ndcg_scores)))
        # create a histogram of the ndcg scores, make bins for each 0.1
        ndcg_scores = np.array(ndcg_scores).squeeze()
        writer.add_histogram("hist_NDCG/val", ndcg_scores, epoch)
        # also make a histogram in matplotlib and save as png
        plt.hist(ndcg_scores, bins=np.arange(0, 1.1, 0.1))
        plt.suptitle("Validation NDCG Histogram")
        # write information about the model to the histogram
        plt.title(f"Model: LightGCN, Embedding Dim: {EMBEDDING_DIM}, Num Layers: {NUM_LAYERS}, Batch Size: {BATCH_SIZE}, LR: {LR}, Num Train Users: {num_train_users}, Num Train Items: {num_train_items}", fontsize=8, wrap=True)
        plt.xlabel("NDCG")
        plt.ylabel("Frequency")
        # save the figure in the hist_NDCG folder, with the title having the model information and the epoch number
        plt.savefig(f"hist_NDCG/val_{EMBEDDING_DIM}_{NUM_LAYERS}_{BATCH_SIZE}_{LR}_{num_train_users}_{num_train_items}_{epoch}.png")
        plt.close()
        # Also save the raw NDCG scores to a csv file, with the model information in the title, and the epoch number
        np.savetxt(f"hist_NDCG/val_{EMBEDDING_DIM}_{NUM_LAYERS}_{BATCH_SIZE}_{LR}_{num_train_users}_{num_train_items}_{epoch}.csv", ndcg_scores, delimiter=",")
        print(mean_ndcg)
        writer.add_scalar("NDCG/val", mean_ndcg.item(), epoch * (num_train_users // BATCH_SIZE) + start_idx // BATCH_SIZE)
        recall_at_k = compute_recall_at_k(validation_graph, model, K)
        print(recall_at_k)
        writer.add_scalar("Recall@K/val", recall_at_k, epoch * (num_train_users // BATCH_SIZE) + start_idx // BATCH_SIZE)
        print("Epoch: {}, NDCG: {}, Recall@{}: {}".format(epoch, mean_ndcg, K, recall_at_k))
        average_number_of_matches = 0
        for user_id in validation_users:
            all_edges = torch.tensor([(user_id, item_id) for item_id in range(num_train_users, num_train_items)], dtype=torch.long).t().contiguous()
            recommendations = model.recommend(all_edges.to(device), src_index=torch.tensor([user_id]).to(device), dst_index=torch.tensor([x for x in range(num_train_users + 1, num_train_items)]).to(device), k=10)[0]
            movie_names = [movie_id_to_movie_name[id_to_movie[int(recommendation)]] for recommendation in recommendations]
            true_user_reviews = user_review_data[id_to_user[user_id]]
            matches = 0
            for movie_name in movie_names:
                if movie_name in true_user_reviews['movie_title'].values:
                    matches += 1
            average_number_of_matches += matches
        average_number_of_matches = average_number_of_matches / len(validation_users)
        print("Average number of matches: {}".format(average_number_of_matches))
        writer.add_scalar("Average number of matches", average_number_of_matches, epoch * (num_train_users // BATCH_SIZE) + start_idx // BATCH_SIZE)
        print("=====================================")
    scheduler.step()

Running on device: cuda
256


100%|██████████| 1/1 [00:01<00:00,  1.73s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.54s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.49s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.06404778441018344
0.8803228327080053
torch.Size([2, 6685])
User: 223_reviews.csv
['Kung Fu Panda 3', 'I, Daniel Blake', 'Midnight in Paris', 'Trolls Holiday', 'Thor: Ragnarok', 'Farewell My Concubine', 'In the Mood for Love', 'Hello Ghost', 'Everything Everywhere All at Once', 'Terrorizers', 'Portrait of a Lady on Fire', 'The Lobster', "Long Day's Journey Into Night", 'Titanic', 'Punch-Drunk Love', 'Nope', 'Fourth Place']
Computing recommendations for 200 users


  0%|          | 0/200 [00:00<?, ?it/s]

['Turning Red', 'Glass Onion: A Knives Out Mystery', 'Avatar: The Way of Water', 'The Batman', 'Everything Everywhere All at Once', 'Mean Girls', 'The Truman Show', 'Scott Pilgrim vs. the World', 'The Perks of Being a Wallflower', 'The Edge of Seventeen', 'Shutter Island', "Howl's Moving Castle", 'All Quiet on the Western Front', 'American Psycho', 'Soul', 'Jurassic Park', 'Forrest Gump', 'tick, tick...BOOM!', 'Girl, Interrupted', 'The Shining']


 50%|████▉     | 99/200 [00:07<00:07, 13.11it/s]

['Turning Red', 'Glass Onion: A Knives Out Mystery', 'Avatar: The Way of Water', 'The Batman', 'Everything Everywhere All at Once', 'Mean Girls', 'The Truman Show', 'Scott Pilgrim vs. the World', 'The Perks of Being a Wallflower', 'The Edge of Seventeen', 'Shutter Island', "Howl's Moving Castle", 'All Quiet on the Western Front', 'American Psycho', 'Soul', 'Jurassic Park', 'Forrest Gump', 'tick, tick...BOOM!', 'Girl, Interrupted', 'The Shining']
['Turning Red', 'Glass Onion: A Knives Out Mystery', 'Avatar: The Way of Water', 'The Batman', 'Everything Everywhere All at Once', 'Mean Girls', 'The Truman Show', 'Scott Pilgrim vs. the World', 'The Perks of Being a Wallflower', 'The Edge of Seventeen', 'Shutter Island', "Howl's Moving Castle", 'All Quiet on the Western Front', 'American Psycho', 'Soul', 'Jurassic Park', 'Forrest Gump', 'tick, tick...BOOM!', 'Girl, Interrupted', 'The Shining']


100%|██████████| 200/200 [00:14<00:00, 13.66it/s]


0.06598101962081238
Epoch: 50, NDCG: 0.8803228327080053, Recall@20: 0.06598101962081238
Average number of matches: 5.082857142857143


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:01<00:00,  1.49s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.48s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.062495219977439204
0.8869869430771371
torch.Size([2, 6594])
User: 6789andre_reviews.csv
['Back to the Future Part III', 'Ratatouille', 'Toy Story 3', 'Come and See', 'Coraline', 'Inglourious Basterds', 'Kung Fu Panda', 'Paths of Glory', 'John Wick: Chapter 3 - Parabellum', 'Pay It Forward', 'Klaus', 'Reservoir Dogs', 'The Guardians of the Galaxy Holiday Special', 'Shrek', 'For a Few Dollars More', 'The Thin Red Line', 'Catch Me If You Can', 'Groundhog Day', 'The Silence of the Lambs', 'Platoon', 'Cars', 'Leaving Las Vegas', 'The Social Network', 'The Good, the Bad and the Ugly', 'The Matrix', 'Forrest Gump', 'Raiders of the Lost Ark', 'John Wick', 'Toy Story 2', 'Captain America: The Winter Soldier', 'Saving Private Ryan']
Computing recommendations for 200 users


  0%|          | 0/200 [00:00<?, ?it/s]

['Glass Onion: A Knives Out Mystery', 'La La Land', 'Knives Out', 'The Dark Knight', 'Get Out', 'Parasite', 'Turning Red', 'The Grand Budapest Hotel', 'Black Adam', 'Lady Bird', 'The Truman Show', 'Mad Max: Fury Road', 'Inglourious Basterds', 'Spirited Away', "Howl's Moving Castle", 'Dune', "Guillermo del Toro's Pinocchio", 'Eternal Sunshine of the Spotless Mind', 'Dead Poets Society', 'Inception']


 52%|█████▏    | 104/200 [00:07<00:05, 17.08it/s]

['Glass Onion: A Knives Out Mystery', 'La La Land', 'Knives Out', 'The Dark Knight', 'Get Out', 'Parasite', 'Turning Red', 'The Grand Budapest Hotel', 'Black Adam', 'Lady Bird', 'The Truman Show', 'Mad Max: Fury Road', 'Inglourious Basterds', 'Spirited Away', "Howl's Moving Castle", 'Dune', "Guillermo del Toro's Pinocchio", 'Eternal Sunshine of the Spotless Mind', 'Dead Poets Society', 'Inception']


100%|██████████| 200/200 [00:14<00:00, 13.96it/s]


0.08771430569549138
Epoch: 100, NDCG: 0.8869869430771371, Recall@20: 0.08771430569549138
Average number of matches: 5.365714285714286


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.48s/it]
100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.062219206055680334
0.8884017964798591
torch.Size([2, 6553])
User: 2mad2chill_reviews.csv
['Joker', 'The Platform', 'Fight Club', 'Warrior', 'Blade Runner 2049', 'Blade Runner', 'The Grand Budapest Hotel', 'Memories of Murder', 'Good Will Hunting', 'Mustang', 'Lady Bird']
Computing recommendations for 200 users


  0%|          | 0/200 [00:00<?, ?it/s]

['Glass Onion: A Knives Out Mystery', 'Knives Out', 'Everything Everywhere All at Once', 'Fight Club', 'The Batman', 'The Dark Knight', 'Lady Bird', 'Hwarang: The Poet Warrior Youth', 'The Grand Budapest Hotel', 'Spirited Away', 'La La Land', 'Pulp Fiction', 'Get Out', 'The Truman Show', 'Ratatouille', 'Spree', 'Scott Pilgrim vs. the World', 'Dune', 'Taxi Driver', 'Avengers: Endgame']


100%|██████████| 200/200 [00:14<00:00, 13.43it/s]


0.07117751775146658
Epoch: 150, NDCG: 0.8884017964798591, Recall@20: 0.07117751775146658
Average number of matches: 5.357142857142857


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.06209804140015314
0.8884278255573627
torch.Size([2, 6611])
User: 23minutes_reviews.csv
["Ferris Bueller's Day Off", 'El Mariachi', 'Psycho', 'Dirty Harry', 'They Live', 'The Muppet Movie', 'Scrooged', 'Night of the Living Dead', 'Howards End', 'From Beyond', 'Boyz n the Hood', 'Akira', 'Phenomena', 'Two Way Stretch', 'E.T. the Extra-Terrestrial', 'Star Wars', 'The Fly', 'Friday the 13th: The Final Chapter', 'The Godfather: Part II', 'The Brood', 'The Hills Have Eyes', 'The Meaning of Life', 'The Terminator', 'The Third Man', 'The Hitcher']
Computing recommendations for 200 users


  0%|          | 0/200 [00:00<?, ?it/s]

['Glass Onion: A Knives Out Mystery', 'Boyhood', 'Lady Bird', 'Get Out', 'La La Land', "The King's Man", 'Fight Club', 'The Batman', 'The Grand Budapest Hotel', 'Knock at the Cabin', "To All the Boys I've Loved Before", 'The Dark Knight', 'Spree', 'Dune', 'Annihilation', 'Spirited Away', 'Black Adam', 'Anna', 'Enola Holmes', 'Taxi Driver']


 65%|██████▌   | 130/200 [00:09<00:05, 13.64it/s]

['Glass Onion: A Knives Out Mystery', 'Boyhood', 'Lady Bird', 'Get Out', 'La La Land', "The King's Man", 'Fight Club', 'The Batman', 'The Grand Budapest Hotel', 'Knock at the Cabin', "To All the Boys I've Loved Before", 'The Dark Knight', 'Spree', 'Dune', 'Annihilation', 'Spirited Away', 'Black Adam', 'Anna', 'Enola Holmes', 'Taxi Driver']


100%|██████████| 200/200 [00:14<00:00, 13.72it/s]


0.07371586492180504
Epoch: 200, NDCG: 0.8884278255573627, Recall@20: 0.07371586492180504
Average number of matches: 5.36


100%|██████████| 1/1 [00:01<00:00,  1.58s/it]
100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.06205472151279737
0.88848333551898
torch.Size([2, 6707])
User: 1morena3_reviews.csv
['Mulholland Drive', 'Thor: Ragnarok', "There's a Man in the Woods", 'Knives Out', 'El Angel', 'WandaVision', 'Moulin Rouge!', 'Perfect Blue', 'The Exterminating Angel', 'The Texas Chain Saw Massacre', 'Another Round', 'Rojo', 'Akira', 'Requiem for a Dream', 'Bad Education', 'Shutter Island', 'The French Dispatch', 'Barbarian', 'Suspiria', 'Stand by Me', 'Coffee and Cigarettes', 'The Secret in Their Eyes', 'Memento', 'Man Facing Southeast', 'Adaptation.']
Computing recommendations for 200 users


  0%|          | 0/200 [00:00<?, ?it/s]

['Knives Out', 'Whiplash', 'The Batman', 'The Dark Knight', 'Fight Club', 'The Grand Budapest Hotel', 'Turning Red', 'Spirited Away', 'Avatar: The Way of Water', 'Mad Max: Fury Road', 'Pulp Fiction', 'Ratatouille', 'Shutter Island', 'Scott Pilgrim vs. the World', 'Top Gun: Maverick', 'Eternal Sunshine of the Spotless Mind', 'LÃ©on: The Professional', 'Spree', 'Forrest Gump', 'Memento']


100%|██████████| 200/200 [00:14<00:00, 13.83it/s]


0.07571118714473979
Epoch: 250, NDCG: 0.88848333551898, Recall@20: 0.07571118714473979
Average number of matches: 5.36


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.49s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.49s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.0620727721422019
0.8884503437277618
torch.Size([2, 6658])
User: 6renda_reviews.csv
['Ratatouille', 'Maze Runner: The Scorch Trials', 'The Twilight Saga: New Moon', 'Fear Street: 1666', 'The Boy in the Striped Pyjamas', 'Harry Potter and the Order of the Phoenix', 'Harry Potter and the Goblet of Fire', 'What Happened to Monday', 'Legally Blonde', 'The Black Phone', 'The Conjuring', 'White Chicks', 'Maze Runner: The Death Cure', 'Maleficent', 'The Twilight Saga: Breaking Dawn - Part 2', 'The Chronicles of Narnia: The Voyage of the Dawn Treader', 'Harry Potter and the Deathly Hallows: Part 1', 'The Twilight Saga: Eclipse', 'Ponyo', 'Monster House', 'Now You See Me 2', 'Project X', 'Alice in Wonderland', 'Avengers: Endgame', '1917', '13 Going on 30', 'Ready or Not', 'X', 'Corpse Bride', 'Midsommar']
Computing recommendations for 200 users


  2%|▏         | 4/200 [00:00<00:11, 16.60it/s]

['Avatar: The Way of Water', 'Turning Red', 'The Virgin Suicides', 'Irreversible', 'Knives Out', 'Thor: Love and Thunder', 'Whiplash', 'Glass Onion: A Knives Out Mystery', 'Easter Sunday', 'The Killing of a Sacred Deer', 'The Batman', 'Parasite', 'Everything Everywhere All at Once', 'Avengers: Age of Ultron', 'Home Alone', 'The Dark Knight', 'The Perks of Being a Wallflower', 'Sorry to Bother You', 'Spider-Man: No Way Home', 'Midsommar']


 34%|███▎      | 67/200 [00:05<00:09, 13.35it/s]

['Avatar: The Way of Water', 'Turning Red', 'The Virgin Suicides', 'Irreversible', 'Knives Out', 'Thor: Love and Thunder', 'Whiplash', 'Glass Onion: A Knives Out Mystery', 'Easter Sunday', 'The Killing of a Sacred Deer', 'The Batman', 'Parasite', 'Everything Everywhere All at Once', 'Avengers: Age of Ultron', 'Home Alone', 'The Dark Knight', 'The Perks of Being a Wallflower', 'Sorry to Bother You', 'Spider-Man: No Way Home', 'Midsommar']


100%|██████████| 200/200 [00:14<00:00, 13.58it/s]


0.06737109872768
Epoch: 300, NDCG: 0.8884503437277618, Recall@20: 0.06737109872768
Average number of matches: 5.36


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.062054529955538736
0.8886445148971284
torch.Size([2, 6654])
User: 09wynna_reviews.csv
['The Shining', 'The Raid', 'Return of the Jedi', 'The X Files', 'Inside Out', 'Pulp Fiction', 'Lilo & Stitch', '28 Days Later', 'Glass Onion: A Knives Out Mystery', 'ERASED', 'The Silence of the Lambs', 'The Lord of the Rings: The Return of the King', 'The Last Samurai', 'The Muppet Christmas Carol', 'Little Miss Sunshine', 'Parasite', 'Back to the Future', 'Jojo Rabbit', 'The Lighthouse', 'X', 'No Time to Die', 'Shang-Chi and the Legend of the Ten Rings', 'Togo', 'Skyfall', 'Sound of Metal', 'Midsommar', 'The Truman Show', 'The Thing']
Computing recommendations for 200 users


  2%|▏         | 4/200 [00:00<00:11, 16.80it/s]

['Glass Onion: A Knives Out Mystery', 'Parasite', 'Avatar: The Way of Water', 'Turning Red', 'Avengers: Endgame', 'Get Out', 'Lady Bird', 'The Truman Show', 'Shutter Island', 'Mad Max: Fury Road', 'Pulp Fiction', 'Top Gun: Maverick', 'Spree', 'Eternal Sunshine of the Spotless Mind', 'Django Unchained', 'Coco', 'Gone Girl', "Guillermo del Toro's Pinocchio", 'Scott Pilgrim vs. the World', 'Memento']


  4%|▍         | 9/200 [00:00<00:11, 16.98it/s]

['Glass Onion: A Knives Out Mystery', 'Parasite', 'Avatar: The Way of Water', 'Turning Red', 'Avengers: Endgame', 'Get Out', 'Lady Bird', 'The Truman Show', 'Shutter Island', 'Mad Max: Fury Road', 'Pulp Fiction', 'Top Gun: Maverick', 'Spree', 'Eternal Sunshine of the Spotless Mind', 'Django Unchained', 'Coco', 'Gone Girl', "Guillermo del Toro's Pinocchio", 'Scott Pilgrim vs. the World', 'Memento']


 42%|████▏     | 83/200 [00:06<00:10, 11.43it/s]

['Glass Onion: A Knives Out Mystery', 'Parasite', 'Avatar: The Way of Water', 'Turning Red', 'Avengers: Endgame', 'Get Out', 'Lady Bird', 'The Truman Show', 'Shutter Island', 'Mad Max: Fury Road', 'Pulp Fiction', 'Top Gun: Maverick', 'Spree', 'Eternal Sunshine of the Spotless Mind', 'Django Unchained', 'Coco', 'Gone Girl', "Guillermo del Toro's Pinocchio", 'Scott Pilgrim vs. the World', 'Memento']


100%|██████████| 200/200 [00:14<00:00, 13.57it/s]


0.07000605063787811
Epoch: 350, NDCG: 0.8886445148971284, Recall@20: 0.07000605063787811
Average number of matches: 5.36


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]
100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:01<00:00,  1.32s/it]
100%|██████████| 1/1 [00:01<00:00,  1.31s/it]
100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.062132017945506955
0.8887486186419744
torch.Size([2, 6627])
User: 58hutchi_reviews.csv
['Seven Samurai', 'Hannah and Her Sisters', 'The Conversation', 'Michael Clayton', 'A Gray State', 'The Cheat', 'A Foreign Affair', 'Apocalypse Now', 'The Rules of the Game', "Singin' in the Rain", 'The Aviator', 'Groundhog Day', 'Shame', 'I Am Cuba', '2001: A Space Odyssey']
Computing recommendations for 200 users


  2%|▏         | 4/200 [00:00<00:13, 14.53it/s]

['Black Adam', 'Avatar: The Way of Water', "The King's Man", 'Sixteen Candles', 'Thor: Love and Thunder', 'Employee of the Month', 'Captain Marvel', 'Enola Holmes', 'The Power of the Dog', "To All the Boys I've Loved Before", 'Knock at the Cabin', 'Hereditary', 'The Mist', 'Do Revenge', 'Avengers: Age of Ultron', 'Before I Fall', 'La La Land', 'Anna', 'Iron Man 3', 'Triangle of Sadness']


100%|██████████| 200/200 [00:14<00:00, 13.60it/s]


0.07700816048160122
Epoch: 400, NDCG: 0.8887486186419744, Recall@20: 0.07700816048160122
Average number of matches: 5.36


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:01<00:00,  1.50s/it]
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]
100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.42s/it]
100%|██████████| 1/1 [00:01<00:00,  1.47s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.0621588645927732
0.8887729298783651
torch.Size([2, 6610])
User: 2goofs_reviews.csv
['Living in Oblivion', 'The Killing of a Chinese Bookie', 'After Hours', 'The Green Knight', 'Good Time', 'The Family Stone', 'Perfect Blue', 'Daisies', 'Monster', 'Chernobyl', 'Under the Silver Lake', 'Taxi Driver', 'TÃ\x81R', 'Honey Boy', 'Vagabond', "Who's Afraid of Virginia Woolf?", 'Sound of Metal', 'Licorice Pizza', 'The Mummy']
Computing recommendations for 200 users


  2%|▏         | 4/200 [00:00<00:13, 15.06it/s]

['Knives Out', 'Whiplash', 'Glass Onion: A Knives Out Mystery', 'The Batman', 'The Dark Knight', 'Fight Club', 'Turning Red', 'The Grand Budapest Hotel', 'Lady Bird', 'La La Land', 'Get Out', 'Spirited Away', 'Avatar: The Way of Water', 'Dune', 'The Truman Show', 'Shutter Island', 'Taxi Driver', 'Mad Max: Fury Road', 'Inglourious Basterds', 'Pulp Fiction']


 53%|█████▎    | 106/200 [00:07<00:06, 14.00it/s]

['Knives Out', 'Whiplash', 'Glass Onion: A Knives Out Mystery', 'The Batman', 'The Dark Knight', 'Fight Club', 'Turning Red', 'The Grand Budapest Hotel', 'Lady Bird', 'La La Land', 'Get Out', 'Spirited Away', 'Avatar: The Way of Water', 'Dune', 'The Truman Show', 'Shutter Island', 'Taxi Driver', 'Mad Max: Fury Road', 'Inglourious Basterds', 'Pulp Fiction']


 55%|█████▌    | 110/200 [00:08<00:06, 12.94it/s]

['Knives Out', 'Whiplash', 'Glass Onion: A Knives Out Mystery', 'The Batman', 'The Dark Knight', 'Fight Club', 'Turning Red', 'The Grand Budapest Hotel', 'Lady Bird', 'La La Land', 'Get Out', 'Spirited Away', 'Avatar: The Way of Water', 'Dune', 'The Truman Show', 'Shutter Island', 'Taxi Driver', 'Mad Max: Fury Road', 'Inglourious Basterds', 'Pulp Fiction']


100%|██████████| 200/200 [00:14<00:00, 13.48it/s]


0.07103304286504923
Epoch: 450, NDCG: 0.8887729298783651, Recall@20: 0.07103304286504923
Average number of matches: 5.36


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.46s/it]
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]
100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]
100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:01<00:00,  1.36s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.35s/it]
100%|██████████| 1/1 [00:01<00:00,  1.37s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:01<00:00,  1.24s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.0621570982039993
0.8887714176524009
torch.Size([2, 6562])
User: 6kamikaze9_reviews.csv
['Puparia', 'A Monster Calls', 'Dark Shadows', 'Saw', 'Edward Scissorhands', "Miss Peregrine's Home for Peculiar Children", 'Champions', 'Dahmer â\x80\x93 Monster: The Jeffrey Dahmer Story', 'Puss in Boots', 'Moon Knight', 'Death Proof', 'Kimi no Iro', 'The Stranger by the Shore', 'Colorful', 'Jumanji: The Next Level', 'Alvin and the Chipmunks: The Road Chip', 'From Dusk Till Dawn', 'Joker', "Zip & Zap and the Captain's Island", 'Top Gun']
Computing recommendations for 200 users


  0%|          | 0/200 [00:00<?, ?it/s]

['Whiplash', 'Glass Onion: A Knives Out Mystery', 'The French Dispatch', 'Everything Everywhere All at Once', 'The Grand Budapest Hotel', 'Fight Club', 'Lady Bird', 'The Piano Teacher', 'The Dark Knight', 'Parasite', 'The Batman', 'La La Land', 'Spirited Away', 'Get Out', 'Spree', 'Scott Pilgrim vs. the World', 'Inglourious Basterds', 'Dune', 'Mad Max: Fury Road', 'Top Gun: Maverick']


100%|██████████| 200/200 [00:14<00:00, 13.53it/s]


0.07565412724795317
Epoch: 500, NDCG: 0.8887714176524009, Recall@20: 0.07565412724795317
Average number of matches: 5.36


100%|██████████| 1/1 [00:01<00:00,  1.51s/it]
100%|██████████| 1/1 [00:01<00:00,  1.38s/it]
100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:01<00:00,  1.33s/it]
100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
100%|██████████| 1/1 [00:01<00:00,  1.19s/it]
100%|██████████| 1/1 [00:01<00:00,  1.19s/it]
100%|██████████| 1/1 [00:01<00:00,  1.22s/it]
100%|██████████| 1/1 [00:01<00:00,  1.19s/it]
100%|██████████| 1/1 [00:01<00:00,  1.22s/it]
100%|██████████| 1/1 [00:01<00:00,  1.21s/it]
100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
100%|██████████| 1/1 [00:01<00:00,  1.24s/it]
100%|██████████| 1/1 [00:01<00:00,  1.19s/it]
100%|██████████| 1/1 [00:01<00:00,  1.25s/it]
100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
100%|██████████| 1/1 [00:01<00:00,  1.20s/it]
100%|██████████| 1/1 [00:01<00:00,  1.19s/it]
100%|██████████| 1/1 [00:01<00:00,

Standard Deviation: 0.06206550076630591
0.8886659323718712
torch.Size([2, 6718])
User: 0becalp_reviews.csv
['The Devil Wears Prada', 'My Love From Another Star', 'The Perks of Being a Wallflower', 'Doctor Strange in the Multiverse of Madness', "Howl's Moving Castle", 'Venom', 'Labyrinth', 'Emergency Couple', 'Spirited Away', 'Fantastic Beasts: The Crimes of Grindelwald', 'Search: WWW', 'Sense and Sensibility', 'Hercules', 'Shang-Chi and the Legend of the Ten Rings', "Master's Sun", 'The Day Naruto Became Hokage', 'Spellbound', "But I'm a Cheerleader", 'Along with the Gods: The Two Worlds']
Computing recommendations for 200 users


  0%|          | 0/200 [00:00<?, ?it/s]

['Knives Out', 'Whiplash', 'Glass Onion: A Knives Out Mystery', 'Everything Everywhere All at Once', 'The Batman', 'Turning Red', 'The Dark Knight', 'Fight Club', 'Get Out', 'The Grand Budapest Hotel', 'Lady Bird', 'Spirited Away', 'La La Land', 'Avatar: The Way of Water', 'Dune', 'Taxi Driver', 'The Truman Show', 'Mad Max: Fury Road', 'Shutter Island', 'Scott Pilgrim vs. the World']


 32%|███▏      | 64/200 [00:04<00:08, 16.91it/s]

['Knives Out', 'Whiplash', 'Glass Onion: A Knives Out Mystery', 'Everything Everywhere All at Once', 'The Batman', 'Turning Red', 'The Dark Knight', 'Fight Club', 'Get Out', 'The Grand Budapest Hotel', 'Lady Bird', 'Spirited Away', 'La La Land', 'Avatar: The Way of Water', 'Dune', 'Taxi Driver', 'The Truman Show', 'Mad Max: Fury Road', 'Shutter Island', 'Scott Pilgrim vs. the World']


 36%|███▋      | 73/200 [00:05<00:09, 13.61it/s]


KeyboardInterrupt: 

In [None]:
# save the model
torch.save(model.state_dict(), f"models/{EMBEDDING_DIM}_{NUM_LAYERS}_{BATCH_SIZE}_{LR}_{num_train_users}_{num_train_items}.pt")

In [None]:
for param_group in optim.param_groups:
    print(param_group['lr'])

4.376630903760431e-05


In [None]:
validation_users = list(set([int(x) for x in validation_edges[0, :]]))
validation_df[validation_df.user_id == id_to_user[0]]

In [None]:
validation_edges[:, validation_edges[0] == 0]

In [None]:
def get_user_positive_items(edge_index):
    """Generates dictionary of positive items for each user

    Args:
        edge_index (torch.Tensor): 2 by N list of edges

    Returns:
        dict: dictionary of positive items for each user
    """
    user_pos_items = {}
    for i in range(edge_index.shape[1]):
        user = edge_index[0][i].item()
        item = edge_index[1][i].item()
        if user not in user_pos_items:
            user_pos_items[user] = []
        user_pos_items[user].append(item)
    return user_pos_items

In [None]:
print()