# Global Setting and Imports

In [None]:
# In order to make things work on google drive
import os
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)
os.chdir('/content/gdrive/MyDrive/Colab Notebooks/LightGCN')

Mounted at /content/gdrive


In [None]:
!pip install scrapbook
!pip install retrying
!pip install pandera

Collecting scrapbook
  Downloading scrapbook-0.5.0-py3-none-any.whl (34 kB)
Collecting papermill (from scrapbook)
  Downloading papermill-2.5.0-py3-none-any.whl (38 kB)
Collecting jedi>=0.16 (from ipython->scrapbook)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, papermill, scrapbook
Successfully installed jedi-0.19.1 papermill-2.5.0 scrapbook-0.5.0
Collecting retrying
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: retrying
Successfully installed retrying-1.3.4
Collecting pandera
  Downloading pandera-0.18.0-py3-none-any.whl (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.0/209.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multimethod (from pandera)
  Downloading multimethod-1.10-py3-none-any.whl (9.9 kB)
Collecting typeguard>=3.0.2 (from 

In [None]:
import sys
import scrapbook as sb
import pandas as pd
import numpy as np
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]
Pandas version: 1.5.3
Tensorflow version: 2.14.0


In [None]:
# top k items to recommend
TOP_K = 10

# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'
DATA_SIZE = 1584082

# Model parameters
EPOCHS = 30
BATCH_SIZE = 512

SEED = DEFAULT_SEED  # Set None for non-deterministic results

yaml_file = "recommenders/models/deeprec/config/lightgcn.yaml"
# user_file = "tests/resources/deeprec/lightgcn/user_embeddings.csv"
# item_file = "tests/resources/deeprec/lightgcn/item_embeddings.csv"

# Load and Split Data

## MovieLens Dataset

In [None]:
df = movielens.load_pandas_df(size=MOVIELENS_DATA_SIZE)

100%|██████████| 4.81k/4.81k [00:00<00:00, 5.21kKB/s]


In [None]:
df.head()

Unnamed: 0,userID,itemID,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


In [None]:
train, test = python_stratified_split(df, ratio=0.75)

In [None]:
data = ImplicitCF(train=train, test=test, seed=SEED)

# Prepare Hyper-parameters

In [None]:
hparams = prepare_hparams(yaml_file,
                          n_layers=3,
                          batch_size=BATCH_SIZE,
                          epochs=EPOCHS,
                          learning_rate=0.005,
                          eval_epoch=5,
                          top_k=TOP_K,
                         )

# Baseline Model

In [None]:
model_movie_base = LightGCN(hparams, data, seed=SEED)

with Timer() as train_time:
    model_movie_base.fit()

print("Took {} seconds for training.".format(train_time.interval))

Epoch 1 (train)1.6s: train loss = 0.39389 = (mf)0.39349 + (embed)0.00040
Epoch 2 (train)1.0s: train loss = 0.26224 = (mf)0.26147 + (embed)0.00077
Epoch 3 (train)1.1s: train loss = 0.23439 = (mf)0.23337 + (embed)0.00102
Epoch 4 (train)1.1s: train loss = 0.22269 = (mf)0.22147 + (embed)0.00122
Epoch 5 (train)1.1s + (eval)0.3s: train loss = 0.20735 = (mf)0.20595 + (embed)0.00140, recall = 0.16499, ndcg = 0.36466, precision = 0.31962, map = 0.09816
Epoch 6 (train)1.2s: train loss = 0.19055 = (mf)0.18892 + (embed)0.00163
Epoch 7 (train)1.4s: train loss = 0.18132 = (mf)0.17946 + (embed)0.00186
Epoch 8 (train)1.2s: train loss = 0.17374 = (mf)0.17166 + (embed)0.00208
Epoch 9 (train)1.4s: train loss = 0.16656 = (mf)0.16431 + (embed)0.00225
Epoch 10 (train)1.5s + (eval)0.3s: train loss = 0.16513 = (mf)0.16272 + (embed)0.00241, recall = 0.18248, ndcg = 0.39565, precision = 0.34422, map = 0.11356
Epoch 11 (train)1.5s: train loss = 0.16183 = (mf)0.15930 + (embed)0.00254
Epoch 12 (train)1.3s: train l

In [None]:
topk_scores = model_movie_base.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.136154
NDCG:	0.450936
Precision@K:	0.391516
Recall@K:	0.211452


# Hard Negative Sampling (Batch)

## Round 1

In [None]:
model_movie_hard = LightGCN(hparams, data, seed=SEED)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


In [None]:
with Timer() as train_time:
    model_movie_hard.fit(neg_mode="hard", neg_size=10)

print("Took {} seconds for training.".format(train_time.interval))

Epoch 1 (train)5.5s: train loss = 0.40833 = (mf)0.40792 + (embed)0.00040
Epoch 2 (train)2.7s: train loss = 0.27106 = (mf)0.27021 + (embed)0.00084
Epoch 3 (train)2.5s: train loss = 0.24996 = (mf)0.24887 + (embed)0.00109
Epoch 4 (train)2.0s: train loss = 0.23369 = (mf)0.23238 + (embed)0.00130
Epoch 5 (train)1.7s + (eval)0.2s: train loss = 0.21646 = (mf)0.21494 + (embed)0.00152, recall = 0.17201, ndcg = 0.37471, precision = 0.32587, map = 0.10313
Epoch 6 (train)1.9s: train loss = 0.20597 = (mf)0.20422 + (embed)0.00175
Epoch 7 (train)1.7s: train loss = 0.19467 = (mf)0.19270 + (embed)0.00197
Epoch 8 (train)1.7s: train loss = 0.18876 = (mf)0.18659 + (embed)0.00217
Epoch 9 (train)2.0s: train loss = 0.18312 = (mf)0.18077 + (embed)0.00235
Epoch 10 (train)2.4s + (eval)0.4s: train loss = 0.17963 = (mf)0.17712 + (embed)0.00251, recall = 0.18415, ndcg = 0.39471, precision = 0.34475, map = 0.11200
Epoch 11 (train)2.5s: train loss = 0.17428 = (mf)0.17162 + (embed)0.00266
Epoch 12 (train)1.8s: train l

In [None]:
topk_scores = model_movie_hard.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

topk_scores.head()

Unnamed: 0,userID,itemID,prediction
0,1,181,5.641906
1,1,174,5.574146
2,1,475,4.925279
3,1,95,4.876225
4,1,22,4.876173


In [None]:
eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.139229
NDCG:	0.454676
Precision@K:	0.396076
Recall@K:	0.211831


## Round 2

In [None]:
model_movie_hard = LightGCN(hparams, data, seed=SEED)

with Timer() as train_time:
    model_movie_hard.fit(neg_mode="hard", neg_size=10)

print("Took {} seconds for training.".format(train_time.interval))

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.
Epoch 1 (train)2.6s: train loss = 0.40600 = (mf)0.40559 + (embed)0.00041
Epoch 2 (train)1.8s: train loss = 0.27268 = (mf)0.27183 + (embed)0.00085
Epoch 3 (train)1.8s: train loss = 0.25265 = (mf)0.25156 + (embed)0.00109
Epoch 4 (train)1.8s: train loss = 0.23157 = (mf)0.23028 + (embed)0.00129
Epoch 5 (train)1.8s + (eval)0.2s: train loss = 0.21923 = (mf)0.21772 + (embed)0.00151, recall = 0.17226, ndcg = 0.38236, precision = 0.33415, map = 0.10513
Epoch 6 (train)2.3s: train loss = 0.20382 = (mf)0.20208 + (embed)0.00174
Epoch 7 (train)2.6s: train loss = 0.19614 = (mf)0.19419 + (embed)0.00195
Epoch 8 (train)2.2s: train loss = 0.19269 = (mf)0.19056 + (embed)0.00212
Epoch 9 (train)1.8s: train loss = 0.18461 = (mf)0.18232 + (embed)0.00228
Epoch 10 (train)1.8s + (eval)0.2s: train loss = 0.18133 = (mf)0.17889 + (embed)0.00243, recall = 0.18188, ndcg = 0.39019, precision = 0.34115, map = 0.11074
Epoch

In [None]:
topk_scores = model_movie_hard.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.138731
NDCG:	0.452987
Precision@K:	0.394910
Recall@K:	0.216197


## Round3

In [None]:
model_movie_hard = LightGCN(hparams, data, seed=SEED)

with Timer() as train_time:
    model_movie_hard.fit(neg_mode="hard", neg_size=10)

print("Took {} seconds for training.".format(train_time.interval))

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.
Epoch 1 (train)2.8s: train loss = 0.40876 = (mf)0.40836 + (embed)0.00040
Epoch 2 (train)1.7s: train loss = 0.27571 = (mf)0.27489 + (embed)0.00082
Epoch 3 (train)1.7s: train loss = 0.25726 = (mf)0.25621 + (embed)0.00105
Epoch 4 (train)1.7s: train loss = 0.24106 = (mf)0.23982 + (embed)0.00124
Epoch 5 (train)1.7s + (eval)0.2s: train loss = 0.22195 = (mf)0.22048 + (embed)0.00147, recall = 0.16671, ndcg = 0.36339, precision = 0.32068, map = 0.09855
Epoch 6 (train)1.7s: train loss = 0.20780 = (mf)0.20608 + (embed)0.00172
Epoch 7 (train)2.3s: train loss = 0.19541 = (mf)0.19344 + (embed)0.00197
Epoch 8 (train)2.5s: train loss = 0.18970 = (mf)0.18752 + (embed)0.00217
Epoch 9 (train)2.1s: train loss = 0.18462 = (mf)0.18227 + (embed)0.00234
Epoch 10 (train)1.8s + (eval)0.2s: train loss = 0.18178 = (mf)0.17929 + (embed)0.00249, recall = 0.18722, ndcg = 0.39982, precision = 0.34825, map = 0.11489
Epoch

In [None]:
topk_scores = model_movie_hard.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.135180
NDCG:	0.450235
Precision@K:	0.396607
Recall@K:	0.213584
