# Global Setting and Imports

In [None]:
# In order to make things work on google drive
import os
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)
os.chdir('/content/gdrive/MyDrive/Colab Notebooks/LightGCN')

Mounted at /content/gdrive


In [None]:
!pip install scrapbook
!pip install retrying
!pip install pandera

Collecting scrapbook
  Downloading scrapbook-0.5.0-py3-none-any.whl (34 kB)
Collecting papermill (from scrapbook)
  Downloading papermill-2.5.0-py3-none-any.whl (38 kB)
Collecting jedi>=0.16 (from ipython->scrapbook)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, papermill, scrapbook
Successfully installed jedi-0.19.1 papermill-2.5.0 scrapbook-0.5.0
Collecting retrying
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: retrying
Successfully installed retrying-1.3.4
Collecting pandera
  Downloading pandera-0.18.0-py3-none-any.whl (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.0/209.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multimethod (from pandera)
  Downloading multimethod-1.10-py3-none-any.whl (9.9 kB)
Collecting typeguard>=3.0.2 (from p

In [None]:
import sys
import scrapbook as sb
import pandas as pd
import numpy as np
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]
Pandas version: 1.5.3
Tensorflow version: 2.14.0


In [None]:
# top k items to recommend
TOP_K = 10

# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'
DATA_SIZE = 1584082

# Model parameters
EPOCHS = 30
BATCH_SIZE = 512

SEED = DEFAULT_SEED  # Set None for non-deterministic results
# SEED= 39

yaml_file = "recommenders/models/deeprec/config/lightgcn.yaml"
# user_file = "tests/resources/deeprec/lightgcn/user_embeddings.csv"
# item_file = "tests/resources/deeprec/lightgcn/item_embeddings.csv"

# Load and Split Data

## MovieLens Dataset

In [None]:
df = movielens.load_pandas_df(size=MOVIELENS_DATA_SIZE)

100%|██████████| 4.81k/4.81k [00:00<00:00, 25.2kKB/s]


In [None]:
df.head()

Unnamed: 0,userID,itemID,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


In [None]:
train, test = python_stratified_split(df, ratio=0.75)

In [None]:
data = ImplicitCF(train=train, test=test, seed=SEED)

# Prepare Hyper-parameters

In [None]:
hparams = prepare_hparams(yaml_file,
                          n_layers=3,
                          batch_size=BATCH_SIZE,
                          epochs=EPOCHS,
                          learning_rate=0.005,
                          eval_epoch=5,
                          top_k=TOP_K,
                         )

# Baseline Model

In [None]:
model_movie_base = LightGCN(hparams, data, seed=SEED)

with Timer() as train_time:
    model_movie_base.fit()

print("Took {} seconds for training.".format(train_time.interval))

Epoch 1 (train)1.6s: train loss = 0.39389 = (mf)0.39349 + (embed)0.00040
Epoch 2 (train)1.0s: train loss = 0.26224 = (mf)0.26147 + (embed)0.00077
Epoch 3 (train)1.1s: train loss = 0.23439 = (mf)0.23337 + (embed)0.00102
Epoch 4 (train)1.1s: train loss = 0.22269 = (mf)0.22147 + (embed)0.00122
Epoch 5 (train)1.1s + (eval)0.3s: train loss = 0.20735 = (mf)0.20595 + (embed)0.00140, recall = 0.16499, ndcg = 0.36466, precision = 0.31962, map = 0.09816
Epoch 6 (train)1.2s: train loss = 0.19055 = (mf)0.18892 + (embed)0.00163
Epoch 7 (train)1.4s: train loss = 0.18132 = (mf)0.17946 + (embed)0.00186
Epoch 8 (train)1.2s: train loss = 0.17374 = (mf)0.17166 + (embed)0.00208
Epoch 9 (train)1.4s: train loss = 0.16656 = (mf)0.16431 + (embed)0.00225
Epoch 10 (train)1.5s + (eval)0.3s: train loss = 0.16513 = (mf)0.16272 + (embed)0.00241, recall = 0.18248, ndcg = 0.39565, precision = 0.34422, map = 0.11356
Epoch 11 (train)1.5s: train loss = 0.16183 = (mf)0.15930 + (embed)0.00254
Epoch 12 (train)1.3s: train l

In [None]:
topk_scores = model_movie_base.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.136154
NDCG:	0.450936
Precision@K:	0.391516
Recall@K:	0.211452


# Hard Negative Sampling (Epoch)

## Round 1

In [None]:
model_movie_hard = LightGCN(hparams, data, seed=SEED)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


In [None]:
with Timer() as train_time:
    model_movie_hard.fit(neg_mode="epoch", neg_size=100)

print("Took {} seconds for training.".format(train_time.interval))

Epoch 1 (train)1.5s: train loss = 0.38765 = (mf)0.38723 + (embed)0.00042
Epoch 2 (train)138.5s: train loss = 0.24316 = (mf)0.24210 + (embed)0.00107
Epoch 3 (train)1.2s: train loss = 0.22033 = (mf)0.21882 + (embed)0.00151
Epoch 4 (train)1.2s: train loss = 0.20635 = (mf)0.20459 + (embed)0.00176
Epoch 5 (train)1.2s + (eval)0.2s: train loss = 0.19318 = (mf)0.19105 + (embed)0.00213, recall = 0.16151, ndcg = 0.36011, precision = 0.31262, map = 0.09734
Epoch 6 (train)1.2s: train loss = 0.17626 = (mf)0.17385 + (embed)0.00241
Epoch 7 (train)1.2s: train loss = 0.16343 = (mf)0.16070 + (embed)0.00273
Epoch 8 (train)1.2s: train loss = 0.15496 = (mf)0.15187 + (embed)0.00309
Epoch 9 (train)1.2s: train loss = 0.15278 = (mf)0.14939 + (embed)0.00338
Epoch 10 (train)1.5s + (eval)0.4s: train loss = 0.14659 = (mf)0.14304 + (embed)0.00355, recall = 0.18391, ndcg = 0.40036, precision = 0.34783, map = 0.11386
Epoch 11 (train)1.7s: train loss = 0.14637 = (mf)0.14264 + (embed)0.00373
Epoch 12 (train)1.7s: train

In [None]:
topk_scores = model_movie_hard.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

topk_scores.head()

Unnamed: 0,userID,itemID,prediction
0,1,475,5.941504
1,1,70,5.544583
2,1,181,5.371619
3,1,98,5.317893
4,1,121,5.29036


In [None]:
eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.136380
NDCG:	0.453612
Precision@K:	0.395122
Recall@K:	0.211966


## Round 2

In [None]:
model_movie_hard = LightGCN(hparams, data, seed=SEED)

with Timer() as train_time:
    model_movie_hard.fit(neg_mode="epoch", neg_size=100)

print("Took {} seconds for training.".format(train_time.interval))

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.
Epoch 1 (train)2.1s: train loss = 0.39771 = (mf)0.39732 + (embed)0.00040
Epoch 2 (train)136.6s: train loss = 0.25853 = (mf)0.25757 + (embed)0.00097
Epoch 3 (train)1.1s: train loss = 0.22711 = (mf)0.22578 + (embed)0.00133
Epoch 4 (train)1.2s: train loss = 0.20861 = (mf)0.20690 + (embed)0.00171
Epoch 5 (train)1.2s + (eval)0.2s: train loss = 0.19496 = (mf)0.19300 + (embed)0.00196, recall = 0.16314, ndcg = 0.36061, precision = 0.31516, map = 0.09468
Epoch 6 (train)1.2s: train loss = 0.18223 = (mf)0.17992 + (embed)0.00231
Epoch 7 (train)1.2s: train loss = 0.16578 = (mf)0.16324 + (embed)0.00254
Epoch 8 (train)1.1s: train loss = 0.15709 = (mf)0.15417 + (embed)0.00291
Epoch 9 (train)1.5s: train loss = 0.15480 = (mf)0.15164 + (embed)0.00315
Epoch 10 (train)1.7s + (eval)0.3s: train loss = 0.14998 = (mf)0.14639 + (embed)0.00358, recall = 0.18518, ndcg = 0.39683, precision = 0.34549, map = 0.11450
Epo

In [None]:
topk_scores = model_movie_hard.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.135641
NDCG:	0.452083
Precision@K:	0.393531
Recall@K:	0.212037


## Round3

In [None]:
model_movie_hard = LightGCN(hparams, data, seed=SEED)

with Timer() as train_time:
    model_movie_hard.fit(neg_mode="epoch", neg_size=100)

print("Took {} seconds for training.".format(train_time.interval))

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.
Epoch 1 (train)1.6s: train loss = 0.39201 = (mf)0.39160 + (embed)0.00041
Epoch 2 (train)143.7s: train loss = 0.24604 = (mf)0.24500 + (embed)0.00104
Epoch 3 (train)1.2s: train loss = 0.22015 = (mf)0.21872 + (embed)0.00143
Epoch 4 (train)1.2s: train loss = 0.20243 = (mf)0.20071 + (embed)0.00171
Epoch 5 (train)1.2s + (eval)0.2s: train loss = 0.18895 = (mf)0.18687 + (embed)0.00208, recall = 0.16833, ndcg = 0.37584, precision = 0.32598, map = 0.10124
Epoch 6 (train)1.2s: train loss = 0.17411 = (mf)0.17177 + (embed)0.00234
Epoch 7 (train)1.6s: train loss = 0.16754 = (mf)0.16491 + (embed)0.00263
Epoch 8 (train)1.6s: train loss = 0.16081 = (mf)0.15798 + (embed)0.00283
Epoch 9 (train)1.6s: train loss = 0.15673 = (mf)0.15364 + (embed)0.00309
Epoch 10 (train)1.4s + (eval)0.2s: train loss = 0.14908 = (mf)0.14576 + (embed)0.00332, recall = 0.18466, ndcg = 0.39866, precision = 0.34528, map = 0.11360
Epo

In [None]:
topk_scores = model_movie_hard.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.134835
NDCG:	0.450118
Precision@K:	0.394698
Recall@K:	0.212223
