<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>

<i>Licensed under the MIT License.</i>

In [None]:
# Install Microsoft Recommendation Libraries
!pip install recommenders[examples]
!pip install tf_slim
!pip install fastai

In [None]:
# Import Microsoft Recommendation Libraries
import sys
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.datasets import movielens
from recommenders.utils.notebook_utils import is_jupyter
from recommenders.datasets.python_splitters import python_chrono_split
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, 
                                                     recall_at_k, get_top_k_items)

import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import itertools

In [None]:
# Download movielens 100k dataset
df = movielens.load_pandas_df(
    size='100k',
    header=["userID", "itemID", "rating", "timestamp"]
)

In [None]:
# Split the data into 75% training and rest testing using Spark chronological splitter
train, test = python_chrono_split(df, 0.75)

In [None]:
# Remove any users or items from the test set that aren't found in the training set.

test = test[test["userID"].isin(train["userID"].unique())]
test = test[test["itemID"].isin(train["itemID"].unique())]

# Remove the timestamp column as it would not be used by the NCF model

train = train.drop('timestamp', axis=1)
test = test.drop('timestamp', axis=1)

In [None]:
# Export the train and test files to CSV, later to be imported in NCF model

train_file = "./train.csv"
test_file = "./test.csv"
train.to_csv(train_file, index=False)
test.to_csv(test_file, index=False)

In [None]:
# Visualisation of Movielens 100k Dataset using dataframe

df = pd.read_csv('/content/train.csv')
df.head()

#The data consists of user and item and the corresponding ratings

In [None]:
# Import the train and test files using the NCFDataset function
data = NCFDataset(train_file=train_file, test_file=test_file, seed=42)

In [None]:
#Defining the parameters for the Recommendation system

# Top k items to be recommended by the system
TOP_K = 10

# Parameters for the Model
epochs = 50
batch_size = 256
seed = 42

In [None]:
# Commented code below shows the model NCF, which will be used in the hyperparameter tuning below

model = NCF (
     n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=8,
    layer_sizes=[32, 16,8],
    n_epochs=epochs,
    batch_size=batch_size,
    learning_rate=0.01,
    verbose=10,
    seed=10
)

In [None]:
# Defining the parameters to be used for the Hyperparameter tuning of the model

# param_grid = {
#     "n_factors": [8, 16],
#     "layer_sizes": [[32,16,8],[16,8]],
#     "n_epochs": [20,50],
#     "learning_rate": [0.0001, 0.001, 0.01]
# }

param_grid = {
    "n_factors": [8],
    "layer_sizes": [[32,16,8]],
    "n_epochs": [5],
    "learning_rate": [0.1]
}


In [None]:
# use itertools library to create different combindations of the parameters and store them in the list param_combinations
param_combinations = list(itertools.product(*param_grid.values()))
results = []


In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error


In [None]:
# Hyperparameter tuning, fit the model for each set of hyperparameters, predict the ratings and recommendations and store the metrics in the list 'results'

for params in param_combinations:
    print (params)
    model = NCF(
        n_users=data.n_users, 
        n_items=data.n_items,
        model_type="NeuMF",
        n_factors=params[0],
        layer_sizes=params[1],
        n_epochs=params[2],
        learning_rate=params[3],
        verbose=10,
        seed=seed
        )
    
    model.fit(data)

    users, items, preds = [], [], []

    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="inner")
    merged2 = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged2[merged2.rating.isnull()].drop('rating', axis=1)

    # Calculating the evaluation metrics:
    score1 = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
    score2 = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
    score3 = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
    score4 = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

    # Denormalize the predicted ratings to calculate the error between the actual and predicted ratings
    min_rating = train['rating'].min()
    max_rating = train['rating'].max()

    predicted_ratings = merged['prediction']
    rescaled_predicted_ratings = (predicted_ratings - predicted_ratings.min()) / (predicted_ratings.max() - predicted_ratings.min())
    rescaled_predicted_ratings = (rescaled_predicted_ratings * (max_rating - min_rating)) + min_rating
    
    y_true = train.rating.values
    y_pred = rescaled_predicted_ratings.values
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
       
    # store the results for this set of hyperparameters
    print("params:", params, "MAP@K:", score1, "NDCG@K:", score2, "Precision@K:", score3, "Recall@K:", score4, "RMSE:", rmse)
    results.append({
        "params": params,
        "MAP@K": score1,
        "NDCG@K": score2, "Precision@K": score3, "Recall@K": score4, "RMSE": rmse
    })



In [None]:
# Choose the model parameter combination with the minimum RMSE
min_entry = min(results, key=lambda x: x['RMSE'])
print('Parametrics that gave the least error: ', min_entry)