# Version updates

In [1]:
__version__ = 'v0.0.1'

1. Initial experiments

# Main code

## Setup

### Install other components

!pip install --quiet cornac==1.5.2 adjustText

### Import Libraries

In [20]:
import os
import sys
import itertools

import datetime
import scipy.sparse as sp
import numpy as np
import pandas as pd
#import seaborn as sns
import matplotlib.pyplot as plt
import zipfile
from adjustText import adjust_text
from collections import defaultdict
from time import sleep
from tqdm.notebook import tqdm

%matplotlib inline

import cornac
from cornac.utils import cache
from cornac.datasets import movielens
from cornac.eval_methods import RatioSplit, CrossValidation
from cornac.models import MF, NMF, WMF, BPR, BaselineOnly, NeuMF, CDL
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

print(f"System version: {sys.version}")
print(f"Cornac version: {cornac.__version__}")

VERBOSE = True

System version: 3.7.13 (default, May 11 2022, 08:49:57) 
[GCC 10.2.1 20210110]
Cornac version: 1.14.2


### Use of SEED
Determine whether we want to use a seed for repeatability.  The drawback is that training will take longer because of single-thread.  

This can be set to False first in the initial experimentation, and locked for consistency when needed later

In [2]:
USE_SEED = True  # True = repeatable, no parallelisation

if USE_SEED:
    SEED = 42
else:
    SEED = None

### Load data

In [3]:
df = pd.read_csv('../../data/train_ratings_seen.csv')

In [4]:
df.head()

Unnamed: 0,user_id,item_id,rating
0,34297304,1260259,4.0
1,34297304,1137544,3.0
2,34297304,1372673,1.0
3,34297304,2511537,3.0
4,34297304,92537,4.0


In [5]:
df.shape

(119922, 3)

In [6]:
dataset = cornac.data.Dataset.from_uir(df.itertuples(index=False))

### Display some facts of what we have

In [7]:
%%time

# Display some parameters
n_users = df['user_id'].nunique()
n_items = df['item_id'].nunique()

print(f'Number of users: {n_users:,}')
print(f'Number of items: {n_items:,}')
print(f'Shape of full matrix: {cornac.data.Dataset.from_uir(df.itertuples(index=False)).matrix.shape}')

# Sparsity
total_possible_rankings = n_users * n_items
sparsity = 1 - df.shape[0]/total_possible_rankings
print(f'Sparsity of matrix: {100*sparsity:0.3f}%')
print()

Number of users: 5,066
Number of items: 39,520
Shape of full matrix: (5066, 39520)
Sparsity of matrix: 99.940%

CPU times: user 382 ms, sys: 229 µs, total: 382 ms
Wall time: 390 ms


## Run experiments on some of the potential candidate models

In [10]:
%%time
K = 50
n_iterations = 1000

# Baseline models
baseline = BaselineOnly()

svd = cornac.models.SVD(
    k=K,
    max_iter = n_iterations,
    verbose=VERBOSE,
    seed=SEED,
    name=f'SVD(K={K})'
)

mostpop = cornac.models.MostPop(
    name = f'MostPop'
)

mf = cornac.models.MF(
    k=K, 
    max_iter=n_iterations,
    verbose=VERBOSE,
    seed=SEED,
    name=f'MF(K={K})'
)

# Collaborative
bpr = cornac.models.BPR(
    k=K, 
    max_iter=n_iterations,
    verbose=VERBOSE, 
    seed=SEED, 
    name=f'BPR(K={K})'
)

wmf = cornac.models.WMF(
    k=K, 
    max_iter=n_iterations,
    verbose=VERBOSE, 
    seed=SEED, 
    name=f'WMF(K={K})'
)

# With Modality
# TODO

rs = RatioSplit(list(df.itertuples(index=False, name=None)), test_size=0.2, seed=SEED, verbose=VERBOSE)

# Define models to try
models = [
    baseline,
    svd,
    mostpop,
    mf,
    bpr,
    wmf,
]

# Define metrics
metrics = [
    cornac.metrics.RMSE(),
    cornac.metrics.FMeasure(k=20),
    cornac.metrics.AUC(),
    cornac.metrics.MRR(),
    cornac.metrics.NCRR(k=20),
    cornac.metrics.NDCG(k=20),
    cornac.metrics.Recall(k=20)
]

CPU times: user 365 ms, sys: 6.95 ms, total: 372 ms
Wall time: 371 ms


In [11]:
%%time
model_experiment = cornac.Experiment(
    eval_method=rs, 
    models=models, 
    metrics=metrics,
    user_based=True,
    save_dir='cornac_experiments',
)

model_experiment.run()


TEST:
...
             |   RMSE |    AUC |  F1@20 |    MRR | NCRR@20 | NDCG@20 | Recall@20 | Train (s) | Test (s)
------------ + ------ + ------ + ------ + ------ + ------- + ------- + --------- + --------- + --------
BaselineOnly | 0.5170 | 0.5750 | 0.0024 | 0.0078 |  0.0021 |  0.0034 |    0.0066 |    0.0309 |  16.4576
SVD(K=50)    | 0.5489 | 0.5293 | 0.0001 | 0.0005 |  0.0000 |  0.0001 |    0.0001 |    4.9027 |  18.9999
MostPop      | 1.5064 | 0.7736 | 0.0103 | 0.0318 |  0.0122 |  0.0178 |    0.0319 |    0.0038 |   8.5064
MF(K=50)     | 0.5489 | 0.5293 | 0.0001 | 0.0005 |  0.0000 |  0.0001 |    0.0001 |    4.9219 |  18.8774
BPR(K=50)    | 2.5031 | 0.8304 | 0.0103 | 0.0319 |  0.0123 |  0.0179 |    0.0322 |   16.5034 |  18.9165
WMF(K=50)    | 2.8696 | 0.6625 | 0.0069 | 0.0211 |  0.0075 |  0.0115 |    0.0213 | 2496.0660 |  23.7914

CPU times: user 7h 56min 22s, sys: 30min 27s, total: 8h 26min 50s
Wall time: 43min 48s


In [12]:
# Vary K

In [21]:
%%time
n_iterations = 1000

rs = RatioSplit(list(df.itertuples(index=False, name=None)), test_size=0.2, seed=SEED, verbose=VERBOSE)

models = []
for K in range(10, 310, 10):
    curr_wmf = cornac.models.WMF(
        k=K, 
        max_iter=n_iterations,
        verbose=VERBOSE, 
        seed=SEED, 
        name=f'WMF(K={K})'
    )
    models.append(curr_wmf)
    
# Define metrics
metrics = [
    cornac.metrics.RMSE(),
    cornac.metrics.FMeasure(k=20),
    cornac.metrics.AUC(),
    cornac.metrics.MRR(),
    cornac.metrics.NCRR(k=20),
    cornac.metrics.NDCG(k=20),
    cornac.metrics.Recall(k=20)
]

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 5066
Number of items = 34404
Number of ratings = 95937
Max rating = 5.0
Min rating = 1.0
Global mean = 3.9
---
Test data:
Number of users = 4462
Number of items = 8487
Number of ratings = 18588
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 5066
Total items = 34404
CPU times: user 367 ms, sys: 3.96 ms, total: 371 ms
Wall time: 369 ms


In [22]:
%%time
model_experiment = cornac.Experiment(
    eval_method=rs, 
    models=models, 
    metrics=metrics,
    user_based=True,
    save_dir='cornac_experiments',
)

model_experiment.run()


[WMF(K=10)] Training started!


100%|██████████| 1000/1000 [33:13<00:00,  1.99s/it, loss=41] 


Learning completed!

[WMF(K=10)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 59692.60it/s]
Ranking: 100%|██████████| 4462/4462 [00:22<00:00, 198.09it/s]


WMF(K=10) model is saved to cornac_experiments/WMF(K=10)/2022-06-12_04-11-11-229174.pkl

[WMF(K=20)] Training started!


100%|██████████| 1000/1000 [34:59<00:00,  2.10s/it, loss=38.3]


Learning completed!

[WMF(K=20)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 50486.37it/s]
Ranking: 100%|██████████| 4462/4462 [00:22<00:00, 196.77it/s]


WMF(K=20) model is saved to cornac_experiments/WMF(K=20)/2022-06-12_04-46-34-871312.pkl

[WMF(K=30)] Training started!


100%|██████████| 1000/1000 [37:31<00:00,  2.25s/it, loss=35.3]


Learning completed!

[WMF(K=30)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58591.15it/s]
Ranking: 100%|██████████| 4462/4462 [00:22<00:00, 196.17it/s]


WMF(K=30) model is saved to cornac_experiments/WMF(K=30)/2022-06-12_05-24-30-651148.pkl

[WMF(K=40)] Training started!


100%|██████████| 1000/1000 [39:26<00:00,  2.37s/it, loss=31.5]


Learning completed!

[WMF(K=40)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 60323.83it/s]
Ranking: 100%|██████████| 4462/4462 [00:22<00:00, 196.09it/s]


WMF(K=40) model is saved to cornac_experiments/WMF(K=40)/2022-06-12_06-04-21-069066.pkl

[WMF(K=50)] Training started!


100%|██████████| 1000/1000 [42:04<00:00,  2.52s/it, loss=14.3]


Learning completed!

[WMF(K=50)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 59697.90it/s]
Ranking: 100%|██████████| 4462/4462 [00:22<00:00, 194.31it/s]


WMF(K=50) model is saved to cornac_experiments/WMF(K=50)/2022-06-12_06-46-49-153312.pkl

[WMF(K=60)] Training started!


100%|██████████| 1000/1000 [45:20<00:00,  2.72s/it, loss=11] 


Learning completed!

[WMF(K=60)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58736.19it/s]
Ranking: 100%|██████████| 4462/4462 [00:23<00:00, 193.56it/s]


WMF(K=60) model is saved to cornac_experiments/WMF(K=60)/2022-06-12_07-32-33-352846.pkl

[WMF(K=70)] Training started!


100%|██████████| 1000/1000 [47:43<00:00,  2.86s/it, loss=9.97]


Learning completed!

[WMF(K=70)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58642.75it/s]
Ranking: 100%|██████████| 4462/4462 [00:22<00:00, 194.20it/s]


WMF(K=70) model is saved to cornac_experiments/WMF(K=70)/2022-06-12_08-20-40-639754.pkl

[WMF(K=80)] Training started!


100%|██████████| 1000/1000 [49:03<00:00,  2.94s/it, loss=9.1]


Learning completed!

[WMF(K=80)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58514.50it/s]
Ranking: 100%|██████████| 4462/4462 [00:22<00:00, 194.40it/s]


WMF(K=80) model is saved to cornac_experiments/WMF(K=80)/2022-06-12_09-10-08-648109.pkl

[WMF(K=90)] Training started!


100%|██████████| 1000/1000 [52:08<00:00,  3.13s/it, loss=8.37]


Learning completed!

[WMF(K=90)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 57627.33it/s]
Ranking: 100%|██████████| 4462/4462 [00:23<00:00, 192.70it/s]


WMF(K=90) model is saved to cornac_experiments/WMF(K=90)/2022-06-12_10-02-41-711585.pkl

[WMF(K=100)] Training started!


100%|██████████| 1000/1000 [55:19<00:00,  3.32s/it, loss=7.76]


Learning completed!

[WMF(K=100)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58799.14it/s]
Ranking: 100%|██████████| 4462/4462 [00:23<00:00, 192.32it/s]


WMF(K=100) model is saved to cornac_experiments/WMF(K=100)/2022-06-12_10-58-26-030068.pkl

[WMF(K=110)] Training started!


100%|██████████| 1000/1000 [57:08<00:00,  3.43s/it, loss=7.24]


Learning completed!

[WMF(K=110)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 63365.98it/s]
Ranking: 100%|██████████| 4462/4462 [00:23<00:00, 188.75it/s]


WMF(K=110) model is saved to cornac_experiments/WMF(K=110)/2022-06-12_11-55-59-532600.pkl

[WMF(K=120)] Training started!


100%|██████████| 1000/1000 [1:00:07<00:00,  3.61s/it, loss=6.79]


Learning completed!

[WMF(K=120)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 60524.07it/s]
Ranking: 100%|██████████| 4462/4462 [00:23<00:00, 190.29it/s]


WMF(K=120) model is saved to cornac_experiments/WMF(K=120)/2022-06-12_12-56-31-830674.pkl

[WMF(K=130)] Training started!


100%|██████████| 1000/1000 [1:03:19<00:00,  3.80s/it, loss=6.39]


Learning completed!

[WMF(K=130)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 59140.94it/s]
Ranking: 100%|██████████| 4462/4462 [00:23<00:00, 189.53it/s]


WMF(K=130) model is saved to cornac_experiments/WMF(K=130)/2022-06-12_14-00-16-318791.pkl

[WMF(K=140)] Training started!


100%|██████████| 1000/1000 [1:05:58<00:00,  3.96s/it, loss=6.04]


Learning completed!

[WMF(K=140)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 56748.06it/s]
Ranking: 100%|██████████| 4462/4462 [00:23<00:00, 189.09it/s]


WMF(K=140) model is saved to cornac_experiments/WMF(K=140)/2022-06-12_15-06-39-300240.pkl

[WMF(K=150)] Training started!


100%|██████████| 1000/1000 [1:08:22<00:00,  4.10s/it, loss=5.73]


Learning completed!

[WMF(K=150)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 61715.75it/s]
Ranking: 100%|██████████| 4462/4462 [00:23<00:00, 186.63it/s]


WMF(K=150) model is saved to cornac_experiments/WMF(K=150)/2022-06-12_16-15-26-606651.pkl

[WMF(K=160)] Training started!


100%|██████████| 1000/1000 [1:11:22<00:00,  4.28s/it, loss=5.45]


Learning completed!

[WMF(K=160)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58106.82it/s]
Ranking: 100%|██████████| 4462/4462 [00:23<00:00, 186.71it/s]


WMF(K=160) model is saved to cornac_experiments/WMF(K=160)/2022-06-12_17-27-14-313304.pkl

[WMF(K=170)] Training started!


100%|██████████| 1000/1000 [1:14:50<00:00,  4.49s/it, loss=5.2]


Learning completed!

[WMF(K=170)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 62270.05it/s]
Ranking: 100%|██████████| 4462/4462 [00:24<00:00, 183.81it/s]


WMF(K=170) model is saved to cornac_experiments/WMF(K=170)/2022-06-12_18-42-30-029450.pkl

[WMF(K=180)] Training started!


100%|██████████| 1000/1000 [1:16:21<00:00,  4.58s/it, loss=4.97]


Learning completed!

[WMF(K=180)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58712.44it/s]
Ranking: 100%|██████████| 4462/4462 [00:24<00:00, 183.63it/s]


WMF(K=180) model is saved to cornac_experiments/WMF(K=180)/2022-06-12_19-59-17-259040.pkl

[WMF(K=190)] Training started!


100%|██████████| 1000/1000 [1:17:45<00:00,  4.67s/it, loss=4.77]


Learning completed!

[WMF(K=190)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 57926.92it/s]
Ranking: 100%|██████████| 4462/4462 [00:24<00:00, 182.48it/s]


WMF(K=190) model is saved to cornac_experiments/WMF(K=190)/2022-06-12_21-17-28-187758.pkl

[WMF(K=200)] Training started!


100%|██████████| 1000/1000 [1:20:36<00:00,  4.84s/it, loss=4.58]


Learning completed!

[WMF(K=200)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58590.53it/s]
Ranking: 100%|██████████| 4462/4462 [00:24<00:00, 182.22it/s]


WMF(K=200) model is saved to cornac_experiments/WMF(K=200)/2022-06-12_22-38-30-662132.pkl

[WMF(K=210)] Training started!


100%|██████████| 1000/1000 [1:23:00<00:00,  4.98s/it, loss=4.41]


Learning completed!

[WMF(K=210)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 59390.93it/s]
Ranking: 100%|██████████| 4462/4462 [00:24<00:00, 180.71it/s]


WMF(K=210) model is saved to cornac_experiments/WMF(K=210)/2022-06-13_00-01-57-410804.pkl

[WMF(K=220)] Training started!


100%|██████████| 1000/1000 [1:27:01<00:00,  5.22s/it, loss=4.25]


Learning completed!

[WMF(K=220)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58654.18it/s]
Ranking: 100%|██████████| 4462/4462 [00:24<00:00, 180.76it/s]


WMF(K=220) model is saved to cornac_experiments/WMF(K=220)/2022-06-13_01-29-25-384774.pkl

[WMF(K=230)] Training started!


100%|██████████| 1000/1000 [1:30:27<00:00,  5.43s/it, loss=4.11]


Learning completed!

[WMF(K=230)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 58795.51it/s]
Ranking: 100%|██████████| 4462/4462 [00:24<00:00, 180.28it/s]


WMF(K=230) model is saved to cornac_experiments/WMF(K=230)/2022-06-13_03-00-19-326544.pkl

[WMF(K=240)] Training started!


100%|██████████| 1000/1000 [1:31:39<00:00,  5.50s/it, loss=3.97]


Learning completed!

[WMF(K=240)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 55187.43it/s]
Ranking: 100%|██████████| 4462/4462 [00:25<00:00, 177.31it/s]


WMF(K=240) model is saved to cornac_experiments/WMF(K=240)/2022-06-13_04-32-25-479659.pkl

[WMF(K=250)] Training started!


100%|██████████| 1000/1000 [1:43:55<00:00,  6.24s/it, loss=3.85]


Learning completed!

[WMF(K=250)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 59801.02it/s]
Ranking: 100%|██████████| 4462/4462 [00:25<00:00, 176.80it/s]


WMF(K=250) model is saved to cornac_experiments/WMF(K=250)/2022-06-13_06-16-47-567935.pkl

[WMF(K=260)] Training started!


100%|██████████| 1000/1000 [1:42:41<00:00,  6.16s/it, loss=3.73]


Learning completed!

[WMF(K=260)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 57567.71it/s]
Ranking: 100%|██████████| 4462/4462 [00:25<00:00, 174.09it/s]


WMF(K=260) model is saved to cornac_experiments/WMF(K=260)/2022-06-13_07-59-56-017634.pkl

[WMF(K=270)] Training started!


100%|██████████| 1000/1000 [1:42:36<00:00,  6.16s/it, loss=3.63]


Learning completed!

[WMF(K=270)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 57703.89it/s]
Ranking: 100%|██████████| 4462/4462 [00:25<00:00, 173.49it/s]


WMF(K=270) model is saved to cornac_experiments/WMF(K=270)/2022-06-13_09-42-59-681233.pkl

[WMF(K=280)] Training started!


100%|██████████| 1000/1000 [1:56:11<00:00,  6.97s/it, loss=3.53]


Learning completed!

[WMF(K=280)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 55891.94it/s]
Ranking: 100%|██████████| 4462/4462 [00:25<00:00, 173.40it/s]


WMF(K=280) model is saved to cornac_experiments/WMF(K=280)/2022-06-13_11-39-38-475623.pkl

[WMF(K=290)] Training started!


100%|██████████| 1000/1000 [1:56:25<00:00,  6.99s/it, loss=3.43]


Learning completed!

[WMF(K=290)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 57316.91it/s]
Ranking: 100%|██████████| 4462/4462 [00:25<00:00, 172.87it/s]


WMF(K=290) model is saved to cornac_experiments/WMF(K=290)/2022-06-13_13-36-30-977207.pkl

[WMF(K=300)] Training started!


100%|██████████| 1000/1000 [1:59:56<00:00,  7.20s/it, loss=3.35] 


Learning completed!

[WMF(K=300)] Evaluation started!


Rating: 100%|██████████| 18588/18588 [00:00<00:00, 50843.40it/s]
Ranking: 100%|██████████| 4462/4462 [00:26<00:00, 171.56it/s]

WMF(K=300) model is saved to cornac_experiments/WMF(K=300)/2022-06-13_15-36-54-972409.pkl

TEST:
...
           |   RMSE |    AUC |  F1@20 |    MRR | NCRR@20 | NDCG@20 | Recall@20 | Train (s) | Test (s)
---------- + ------ + ------ + ------ + ------ + ------- + ------- + --------- + --------- + --------
WMF(K=10)  | 3.0016 | 0.6567 | 0.0054 | 0.0178 |  0.0060 |  0.0086 |    0.0145 | 1993.1588 |  23.4081
WMF(K=20)  | 2.9987 | 0.6392 | 0.0056 | 0.0171 |  0.0057 |  0.0085 |    0.0148 | 2099.9651 |  23.6742
WMF(K=30)  | 2.9932 | 0.6280 | 0.0055 | 0.0167 |  0.0062 |  0.0093 |    0.0164 | 2252.1428 |  23.6327
WMF(K=40)  | 2.9871 | 0.6339 | 0.0053 | 0.0154 |  0.0053 |  0.0084 |    0.0156 | 2366.7826 |  23.6294
WMF(K=50)  | 2.8696 | 0.6625 | 0.0069 | 0.0211 |  0.0075 |  0.0115 |    0.0213 | 2524.2343 |  23.8421
WMF(K=60)  | 2.8604 | 0.6542 | 0.0068 | 0.0214 |  0.0078 |  0.0122 |    0.0243 | 2720.2537 |  23.9367
WMF(K=70)  | 2.8760 | 0.6449 | 0.0053 | 0.0184 |  0.0061 |  0.0096 |    0.0197 | 28




## Train on best model

### Training

We will train on the train data, and validate on the probe data

### Save the model

## Extract the recommendations

### TO BE COMPLETED


# Hyperparameter search

### TO BE COMPLETED