Inductive Matrix Completion Based On Graph Neural Networks
- Link: [Paper](https://openreview.net/pdf?id=ByxxgCEYDS)

In [None]:
# !conda install pyg -c pyg -y
# !pip install simpletransformers

In [1]:
from pathlib import Path
import sys
sys.path.append('../../')
import math

import pandas as pd
import numpy as np
import torch.nn.functional as F
from torch.optim import Adam
from torch_geometric.loader import DataLoader

from dataset.movielens import MovieLensDataset
from models.igmc import IGMC
from helper.igmc_utils import *

In [20]:
BASE_PATH = Path('../../raw_data/movie-lens/ml-latest-small')
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
SEED = 42
EPOCHS=10
BATCH_SIZE=512
LR=2e-3
LR_DECAY_STEP = 20
LR_DECAY_VALUE = 10

In [3]:
df_movies = pd.read_csv(BASE_PATH/'movies.csv')
df_links = pd.read_csv(BASE_PATH/'links.csv')
df_tags = pd.read_csv(BASE_PATH/'tags.csv')
df_ratings = pd.read_csv(BASE_PATH/'ratings.csv').drop(labels='timestamp', axis=1)

df_items, genres, genres_mp  = process_movies(df_movies, use_embeddings=True)
(rated_users, rated_users_dict, num_users), (rated_items, rated_items_dict, num_items), ratings = get_nodes(df_ratings)
item_features = get_item_features(df_items, rated_items_dict, sparse=False)
user_features = get_user_features(df_ratings, df_items, genres, genres_mp, rated_users_dict, n=5, sparse=False)
class_values = np.sort(np.unique(ratings))
rating_dict = {r: i for i, r in enumerate(np.sort(np.unique(ratings)).tolist())}

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTextRepresentation: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTextRepresentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTextRepresentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTextRepres

In [4]:
df_movies.head(2)

Unnamed: 0,movieId,title,genres,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,feature
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,0,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,"[-0.4027835726737976, 0.14599479734897614, -0...."
1,2,Jumanji (1995),Adventure|Children|Fantasy,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,"[-0.40278360247612, 0.14599479734897614, -0.37..."


In [5]:
df_links.head(2)

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0


In [6]:
df_ratings.head(2)

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0


In [7]:
df_tags.head(2)

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996


In [8]:
df_items.head(2)

Unnamed: 0,movieId,title,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,feature
0,1,Toy Story (1995),0,0,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,"[-0.4027835726737976, 0.14599479734897614, -0...."
1,2,Jumanji (1995),0,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,"[-0.40278360247612, 0.14599479734897614, -0.37..."


In [12]:
samples = (rated_users, rated_items, ratings)
user_train_idx, item_train_idx, user_test_idx, item_test_idx, train_labels, test_labels = split(samples, rating_dict)

data = train_labels + 1.
data = data.astype(np.float32)
adj_mat = sp.csr_matrix(
    (data, [user_train_idx, item_train_idx]), 
    shape=[num_users, num_items], 
    dtype=np.float32,
)

train_dataset = MovieLensDataset(
    root='../../raw_data/movie-lens/ml-latest-small',
    adj_mat=adj_mat,
    links=(user_train_idx, item_train_idx),
    labels=train_labels,
    h=1,
    sample_ratio=1,
    max_nodes_per_hop=200,
    u_features=user_features,
    v_features=item_features,
    class_values=class_values,
)

test_dataset = MovieLensDataset(
    root='../../raw_data/movie-lens/ml-latest-small',
    adj_mat=adj_mat,
    links=(user_test_idx, item_test_idx),
    labels=test_labels,
    h=1,
    sample_ratio=1,
    max_nodes_per_hop=200,
    u_features=user_features,
    v_features=item_features,
    class_values=class_values,
)

train_loader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, BATCH_SIZE, shuffle=False, num_workers=0)

In [13]:
num_relations = len(class_values)
n_features = user_features.shape[1] + item_features.shape[1]
model = IGMC(
    train_dataset, 
    num_relations=num_relations, 
    num_bases=4,   
    side_features=True, 
    n_side_features=n_features, 
)

In [21]:
model.to(DEVICE)
model.reset_parameters()
optimizer = Adam(model.parameters(), lr=LR, weight_decay=0)

for epoch in range(1, EPOCHS+1):
    model.train()
    train_loss_all = 0
    for idx, train_batch in enumerate(train_loader):
        optimizer.zero_grad()
        train_batch = train_batch.to(DEVICE)
        y_pred = model(train_batch)
        y_true = train_batch.y
        train_loss = F.mse_loss(y_pred, y_true)
        if idx % 20 == 0:
            print(round(train_loss.item(), 2), end=' ')
        train_loss.backward()
        train_loss_all += BATCH_SIZE * float(train_loss)
        optimizer.step()
        torch.cuda.empty_cache()
        
    train_loss_all = train_loss_all / len(train_loader.dataset)
    
    print('epoch', epoch,'; train loss', train_loss_all)

    if epoch % LR_DECAY_STEP == 0:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] / LR_DECAY_VALUE

13.43 1.22 0.99 1.02 0.9 1.03 1.01 0.91 epoch 1 ; train loss 1.2017364373327442
0.93 0.95 0.97 0.87 1.0 0.96 0.94 1.01 epoch 2 ; train loss 0.9697927233897009
1.02 0.88 0.88 0.88 0.88 0.9 1.02 0.89 epoch 3 ; train loss 0.9594164222365078
1.05 0.86 0.97 0.96 0.96 0.91 0.97 0.96 epoch 4 ; train loss 0.9516343217816534
0.89 1.03 0.93 0.84 0.99 0.84 1.04 0.77 epoch 5 ; train loss 0.9356420084463916
0.88 0.98 1.0 0.9 0.85 0.9 0.81 0.88 epoch 6 ; train loss 0.9253439608886161
0.91 0.86 0.92 0.81 0.86 0.83 0.96 0.85 epoch 7 ; train loss 0.8995138412250869
0.92 0.86 0.89 0.92 0.84 0.75 0.86 0.96 epoch 8 ; train loss 0.8930298194312113
0.83 0.83 0.89 0.91 0.8 0.79 0.82 1.02 epoch 9 ; train loss 0.8806095373921942
0.79 0.82 0.87 0.86 0.83 0.83 0.79 0.85 epoch 10 ; train loss 0.8749942027648239


In [22]:
model.eval()
test_loss = 0
for test_batch in test_loader:
    test_batch = test_batch.to(DEVICE)
    with torch.no_grad():
        y_pred = model(test_batch)
    y_true = test_batch.y
    test_loss += F.mse_loss(y_pred, y_true, reduction='sum')
mse_loss = float(test_loss) / len(test_loader.dataset)

print('test MSE loss', mse_loss)
print('test RMSE loss', math.sqrt(mse_loss))

test MSE loss 0.8010337191032825
test RMSE loss 0.8950048709941653


In [None]:
y_true

In [None]:
n_round(y_pred, 0.5)