In [1]:
# Importing some libraries
import graphlab as gl
import numpy as np
import pandas as pd
import pickle
import os
from  sklearn.preprocessing import MinMaxScaler
import warnings
## Ignoring Warnings
warnings.filterwarnings('ignore')

## 1) Data import

In [2]:
# Import Files
data = pd.read_csv('./outputdata/ratings.csv')
data.columns = ['user_id','item_id','rating','time']
users = pd.read_csv('./outputdata/users.csv')
movies = pickle.load(open('./outputdata/movies.p'))
movies.columns = ['item_id','title','genre','plot']
movies_omdb = pd.read_csv('./outputdata/movie_omdb.csv')
movies_expanded = pd.read_csv('./outputdata/movies_expanded.csv').ix[:,1:]
x = list(movies_expanded.columns)
x[0] = 'item_id'
movies_expanded.columns = x
movies_expanded = movies_expanded.drop(['movie_title','genre'],axis=1)
movies_name = gl.SFrame('./outputdata/movies.csv')
movies_name = gl.SFrame({'item_id': movies_name['movie_id'],'title' : movies_name['movie_title'],'genre': movies_name['genre']})

## 2) Data Manipulation

In [3]:
### Defining all the fucntions for Manipulations

# Function to Removing "tt" from the movie_id
def subtring(x):
    return (int(x[2:]))

## Function for Extracting numerical run time from text
def runtime(x):
    if pd.isnull(x):
        return np.NaN
    elif len(x.split()) == 2:
        return int(x.split()[0])
    elif len(x.split()) == 4:
        return int(x.split()[0]) * 60 + int(x.split()[2])
    
## Function if a movie won a award or not
def award(x):
    if pd.isnull(x):
        return 0
    else:
        return 1

## Function if the item is a Movie or not, assuming Nulls are also movies
def is_movie(x):
    if pd.isnull(x):
        return 1
    elif x == "movie":
        return 1
    else:
        return 0

In [4]:
## Fixing movie_id in omdb table - Removing tt from string and converting into a integer
movies_omdb['item_id'] = movies_omdb.imdbID.apply(subtring)

#Implementing the above function for data transformation
movies_omdb.Runtime = movies_omdb.Runtime.apply(runtime)

# Taking only selective columns from OMDB movie data
movies_omdb1 = movies_omdb.ix[:,[1,12,15,16,18,20]] 

# Applying remaining transformation
movies_omdb1.Awards = movies_omdb1.Awards.apply(award)
movies_omdb1.Type = movies_omdb1.Type.apply(is_movie)
movies_omdb1.imdbRating[pd.isnull(movies_omdb1.imdbRating)] =  np.mean(movies_omdb1.imdbRating )
movies_omdb1.imdbVotes[pd.isnull(movies_omdb1.imdbVotes)] =  np.mean(movies_omdb1.imdbVotes )

# Merging OMDB features with Genres
movies_full = pd.merge(movies_expanded,movies_omdb1,how="inner")

In [5]:
## Converting data in SFrame
ratings = gl.SFrame(data.ix[:,[0,1,2]])
genre = gl.SFrame(movies_expanded)
omdb = gl.SFrame(movies_omdb1)
movie_features_full = gl.SFrame(movies_full)

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: C:\Users\AAYUSH~1\AppData\Local\Temp\graphlab_server_1480798848.log.0


This non-commercial license of GraphLab Create for academic use is assigned to agraw134@umn.edu and will expire on November 11, 2017.


## 3) Modelling

In [6]:
# Validation Code for different algorithms together

# Creating Folds for Cross Validation
folds = gl.cross_validation.KFold(ratings, 5)

# List creating for saving Cross Validation Results
result_mf_rating = list()
result_rmf_rating = list()
result_mf_rating_genre = list()
result_mf_rating_omdb = list()
result_mf_rating_full = list()

# Running Models with Cross Validation
for train, valid in folds:
    # Model 1 : Factorization Machines with Rating Database
    mf_rating = gl.factorization_recommender.create(train, target='rating')
    
    # Model 2: Rank based Factorization Machines with Rating Database
    rmf_rating = gl.recommender.create(train, target='rating')
    
    # Model 3:  Factorization Machines with Rating Database and Movie Genre as side features
    mf_rating_genre = gl.factorization_recommender.create(train, target='rating',item_data = genre,side_data_factorization=True)
    
    # Model 4:  Factorization Machines with Rating Database and Movie OMDB as side features
    mf_rating_omdb = gl.factorization_recommender.create(train, target='rating',item_data = omdb,side_data_factorization=True)
    
    # Model 5:  Factorization Machines with Rating Database, Genre and  Movie OMDB as side features
    mf_rating_full = gl.factorization_recommender.create(train, target='rating',item_data = movie_features_full,side_data_factorization=True)
    
    # Evaluating Model 1 on Test set
    x = mf_rating.evaluate(valid)
    result_mf_rating.append(x['rmse_overall'])
    
    # Evaluating Model 2 on Test set
    x = rmf_rating.evaluate(valid)
    result_rmf_rating.append(x['rmse_overall'])
    
    # Evaluating Model 3 on Test set
    x = mf_rating_genre.evaluate(valid)
    result_mf_rating_genre.append(x['rmse_overall'])        
    
    # Evaluating Model 4 on Test set
    x = mf_rating_omdb.evaluate(valid)
    result_mf_rating_omdb.append(x['rmse_overall'])
    
    # Evaluating Model 5 on Test set
    x = mf_rating_full.evaluate(valid)
    result_mf_rating_full.append(x['rmse_overall'])


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    |        0.0        |        0.0        |
|   2    | 0.000166574125486 | 1.20746798094e-06 |
|   3    | 0.000111049416991 | 1.20746798094e-06 |
|   4    | 8.32870627429e-05 | 1.20746798094e-06 |
|   5    | 8.88395335924e-05 | 1.13028695255e-05 |
|   6    | 9.25411808255e-05 | 1.17714324664e-05 |
|   7    | 7.93210121361e-05 | 1.17714324664e-05 |
|   8    | 9.71682398667e-05 | 1.68517544342e-05 |
|   9    | 8.63717687704e-05 | 1.68517544342e-05 |
|   10   | 9.99444752915e-05 | 7.26232394117e-05 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.7109780751888786)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|   6680  |   1   | 0.0005949


Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    | 0.0168795113826 | 0.00304287120806 |
|   2    | 0.0167684619656 | 0.00547489118675 |
|   3    | 0.0235794928743 | 0.00981539588325 |
|   4    | 0.0270682953914 |  0.013154426297  |
|   5    | 0.0306496390894 | 0.0175915960111  |
|   6    | 0.0305941143809 | 0.0226055340234  |
|   7    | 0.0307289601015 |  0.026133872466  |
|   8    | 0.0310938367574 | 0.0305139275245  |
|   9    | 0.0341291874884 | 0.0382261690784  |
|   10   | 0.0343253747918 | 0.0414548256666  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.8829676981542138)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|   2235  |   1   | 0.000201570747734 |
+---------+-------+------------


Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    | 0.0372015546918 | 0.00434050469649 |
|   2    | 0.0373681288173 | 0.00767953511029 |
|   3    | 0.0369424393855 | 0.0109081916985  |
|   4    | 0.0302054414214 |  0.011818359167  |
|   5    | 0.0254525263742 | 0.0126949586112  |
|   6    | 0.0251341847122 | 0.0143700907205  |
|   7    |  0.023431426985 | 0.0156988167321  |
|   8    | 0.0210299833426 | 0.0158178848581  |
|   9    | 0.0191868714911 | 0.0159807185253  |
|   10   |  0.017268184342 | 0.0159807185253  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.8134799174382465)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|   875   |   1   | 0.000734288072099 |
+---------+-------+------------


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    |        0.0        |        0.0        |
|   2    |        0.0        |        0.0        |
|   3    | 3.70164723302e-05 | 2.46776482201e-07 |
|   4    | 2.77623542476e-05 | 2.46776482201e-07 |
|   5    | 4.44197667962e-05 | 4.93552964402e-07 |
|   6    | 3.70164723302e-05 | 4.93552964402e-07 |
|   7    | 3.17284048544e-05 | 4.93552964402e-07 |
|   8    | 4.16435313715e-05 | 9.66103675001e-07 |
|   9    | 7.40329446604e-05 | 4.89044080294e-06 |
|   10   | 6.66296501943e-05 | 4.89044080294e-06 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.66178480024915)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|   1282  |   1   | 0.000360973


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    | 0.000111049416991 | 3.26615932325e-06 |
|   2    | 5.55247084953e-05 | 3.26615932325e-06 |
|   3    | 3.70164723302e-05 | 3.26615932325e-06 |
|   4    | 2.77623542476e-05 | 3.26615932325e-06 |
|   5    | 2.22098833981e-05 | 3.26615932325e-06 |
|   6    | 1.85082361651e-05 | 3.26615932325e-06 |
|   7    | 3.17284048544e-05 | 3.73472226414e-06 |
|   8    | 2.77623542476e-05 | 3.73472226414e-06 |
|   9    | 3.70164723302e-05 | 5.95571060395e-06 |
|   10   | 5.55247084953e-05 | 9.85991929191e-06 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.6612473624617028)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|   7379  |   1   | 0.0003053


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    | 0.000111931945377 |  4.3723416163e-07 |
|   2    | 0.000111931945377 | 3.10228048013e-06 |
|   3    | 7.46212969181e-05 | 3.10228048013e-06 |
|   4    | 5.59659726886e-05 | 3.10228048013e-06 |
|   5    | 4.47727781509e-05 | 3.10228048013e-06 |
|   6    | 7.46212969181e-05 | 0.000116756255786 |
|   7    | 9.59416674662e-05 | 0.000119346027359 |
|   8    | 9.79404522051e-05 | 0.000121041965925 |
|   9    | 0.000124368828197 | 0.000141721467803 |
|   10   | 0.000111931945377 | 0.000141721467803 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.7092802609611355)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  11709  |   1   | 0.0004204


Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    | 0.0172375195881 | 0.00309104334597 |
|   2    | 0.0161182001343 | 0.00615663480973 |
|   3    | 0.0217521080516 | 0.00926437346599 |
|   4    | 0.0248209088874 | 0.0132508444133  |
|   5    |  0.028497873293 | 0.0176180431301  |
|   6    | 0.0288784419073 | 0.0218423092579  |
|   7    | 0.0326521474943 | 0.0323878562315  |
|   8    | 0.0354964181777 | 0.0391200404804  |
|   9    | 0.0350595726687 | 0.0415998377082  |
|   10   | 0.0339601522274 | 0.0431882115701  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.88571743157764)

Per User RMSE (best)
+---------+-------+------------------+
| user_id | count |       rmse       |
+---------+-------+------------------+
|  13402  |   1   | 0.00182167643279 |
+---------+-------+------------------


Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    | 0.0372733378106 | 0.00288675278088 |
|   2    | 0.0303335571972 | 0.00431559947936 |
|   3    | 0.0227968062085 | 0.00515208639057 |
|   4    | 0.0199798522498 | 0.00594025939558 |
|   5    |  0.020774569062 | 0.00964268072344 |
|   6    |  0.017965077233 | 0.00986226300351 |
|   7    | 0.0164060251367 | 0.0103791532768  |
|   8    | 0.0143692634878 | 0.0104351192495  |
|   9    | 0.0127726786558 | 0.0104351192495  |
|   10   | 0.0114954107902 | 0.0104351192495  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.8214859368533527)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  13642  |   1   | 0.000658759812137 |
+---------+-------+------------


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    |        0.0        |        0.0        |
|   2    |        0.0        |        0.0        |
|   3    |        0.0        |        0.0        |
|   4    | 2.79829863443e-05 | 3.87307769471e-07 |
|   5    | 2.23863890754e-05 | 3.87307769471e-07 |
|   6    | 1.86553242295e-05 | 3.87307769471e-07 |
|   7    |  1.5990277911e-05 | 3.87307769471e-07 |
|   8    | 1.39914931722e-05 | 3.87307769471e-07 |
|   9    | 2.48737656394e-05 | 4.24703002386e-06 |
|   10   | 2.23863890754e-05 | 4.24703002386e-06 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.6734013580532345)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  12648  |   1   | 0.0001870


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    | 0.000111931945377 | 2.21209378216e-07 |
|   2    | 5.59659726886e-05 | 2.21209378216e-07 |
|   3    | 7.46212969181e-05 | 1.14144039159e-05 |
|   4    | 5.59659726886e-05 | 1.14144039159e-05 |
|   5    | 4.47727781509e-05 | 1.14144039159e-05 |
|   6    | 3.73106484591e-05 | 1.14144039159e-05 |
|   7    | 3.19805558221e-05 | 1.14144039159e-05 |
|   8    | 2.79829863443e-05 | 1.14144039159e-05 |
|   9    | 2.48737656394e-05 | 1.14144039159e-05 |
|   10   | 3.35795836132e-05 | 0.000123346349293 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.6618901583089931)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  16945  |   1   | 8.9233503


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    |        0.0        |        0.0        |
|   2    | 5.60600964234e-05 | 3.86621354644e-07 |
|   3    | 3.73733976156e-05 | 3.86621354644e-07 |
|   4    | 2.80300482117e-05 | 3.86621354644e-07 |
|   5    | 4.48480771387e-05 |  1.0165100785e-06 |
|   6    | 3.73733976156e-05 |  1.0165100785e-06 |
|   7    | 3.20343408134e-05 |  1.0165100785e-06 |
|   8    | 5.60600964234e-05 |  1.2585600041e-05 |
|   9    | 7.47467952312e-05 |  1.7695937658e-05 |
|   10   | 7.84841349927e-05 | 2.04989424792e-05 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.7058085218436736)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  20737  |   1   | 0.0005205


Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    |  0.016818028927 | 0.00235695308266 |
|   2    | 0.0155286467093 | 0.00470967402421 |
|   3    | 0.0203311283029 | 0.00941815285689 |
|   4    | 0.0233210001121 | 0.0132185604875  |
|   5    | 0.0290167059087 | 0.0236052567184  |
|   6    | 0.0312441604066 | 0.0272441082926  |
|   7    | 0.0320343408134 | 0.0322568902202  |
|   8    | 0.0351216504092 | 0.0391944559403  |
|   9    | 0.0363892314784 | 0.0445652414609  |
|   10   | 0.0350824083417 | 0.0470128727653  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.8879569744382643)

Per User RMSE (best)
+---------+-------+------------------+
| user_id | count |       rmse       |
+---------+-------+------------------+
|  22015  |   1   | 9.1820262722e-05 |
+---------+-------+----------------


Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    | 0.0424935530889 | 0.00363988972416 |
|   2    |  0.033636057854 | 0.00581184592562 |
|   3    | 0.0293007437306 | 0.00883764875707 |
|   4    | 0.0253952236798 | 0.0099680808159  |
|   5    | 0.0216167731808 | 0.0105489583441  |
|   6    | 0.0189296258923 | 0.0109996496752  |
|   7    | 0.0166418400525 | 0.0111832308259  |
|   8    | 0.0153324363718 |  0.011775290227  |
|   9    | 0.0137409525233 | 0.0117953602116  |
|   10   | 0.0125574615988 | 0.0119587659432  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.8447471620359903)

Per User RMSE (best)
+---------+-------+----------------+
| user_id | count |      rmse      |
+---------+-------+----------------+
|  22180  |   1   | 0.122281502593 |
+---------+-------+----------------+
[1 row


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    | 0.000112120192847 | 7.18719184915e-07 |
|   2    | 0.000112120192847 | 1.31510318942e-06 |
|   3    | 7.47467952312e-05 | 1.31510318942e-06 |
|   4    | 5.60600964234e-05 | 1.31510318942e-06 |
|   5    | 4.48480771387e-05 | 1.31510318942e-06 |
|   6    | 7.47467952312e-05 | 0.000117587895771 |
|   7    | 8.00858520334e-05 | 0.000117899340751 |
|   8    | 9.81051687409e-05 | 0.000118604009521 |
|   9    | 8.72045944363e-05 | 0.000118604009521 |
|   10   | 7.84841349927e-05 | 0.000118604009521 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.6661563926248433)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  22100  |   1   | 0.0005612


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    | 0.000112120192847 | 3.61678041441e-06 |
|   2    | 0.000112120192847 | 4.33549959933e-06 |
|   3    | 7.47467952312e-05 | 4.33549959933e-06 |
|   4    |  8.4090144635e-05 | 4.93188360383e-06 |
|   5    | 8.96961542774e-05 | 5.57625252824e-06 |
|   6    | 7.47467952312e-05 | 5.57625252824e-06 |
|   7    | 6.40686816267e-05 | 5.57625252824e-06 |
|   8    | 5.60600964234e-05 | 5.57625252824e-06 |
|   9    |  6.2288996026e-05 | 6.22062145264e-06 |
|   10   | 5.60600964234e-05 | 6.22062145264e-06 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.6556843060633093)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  18744  |   1   | 6.0570226


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    | 0.000337040781935 | 9.01391367715e-06 |
|   2    | 0.000168520390967 | 9.01391367715e-06 |
|   3    | 0.000112346927312 | 9.01391367715e-06 |
|   4    | 0.000112346927312 | 1.11744315101e-05 |
|   5    | 8.98775418492e-05 | 1.11744315101e-05 |
|   6    |  7.4897951541e-05 | 1.11744315101e-05 |
|   7    |  6.4198244178e-05 | 1.11744315101e-05 |
|   8    | 9.83035613976e-05 | 2.03028531764e-05 |
|   9    | 0.000112346927312 | 2.70668929861e-05 |
|   10   |  0.00010111223458 | 2.70668929861e-05 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.66175797794858)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  29912  |   1   | 0.000638347


Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    | 0.0159532636782 | 0.00276662954756 |
|   2    | 0.0255589259634 | 0.00633787798264 |
|   3    | 0.0220199977531 | 0.00930239074044 |
|   4    | 0.0257274463543 | 0.0141313485385  |
|   5    |  0.028625997079 | 0.0182539038062  |
|   6    | 0.0313822416957 | 0.0229816137661  |
|   7    | 0.0355016290304 |  0.030679703601  |
|   8    | 0.0348135041007 | 0.0350339147161  |
|   9    | 0.0366001323197 | 0.0456231235422  |
|   10   | 0.0366138636108 | 0.0500875037696  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.8427377420318056)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  27179  |   1   | 0.000124353493429 |
+---------+-------+------------


Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    | 0.0402201999775 | 0.00412255526768 |
|   2    | 0.0320750477474 | 0.0055588562132  |
|   3    | 0.0296221398345 | 0.00969420409877 |
|   4    | 0.0239298955174 | 0.0104727387483  |
|   5    | 0.0218177732839 | 0.0116672193949  |
|   6    | 0.0187806613489 | 0.0120165118821  |
|   7    | 0.0166594443642 | 0.0122354949379  |
|   8    |  0.015012358162 | 0.0124983880796  |
|   9    |  0.013544046237 | 0.0126341486614  |
|   10   | 0.0126053252444 | 0.0127896556312  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.823168288747858)

Per User RMSE (best)
+---------+-------+----------------+
| user_id | count |      rmse      |
+---------+-------+----------------+
|  32022  |   1   | 0.242465884323 |
+---------+-------+----------------+
[1 rows


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    | 0.000112346927312 | 2.16051783291e-06 |
|   2    | 5.61734636558e-05 | 2.16051783291e-06 |
|   3    | 3.74489757705e-05 | 2.16051783291e-06 |
|   4    | 2.80867318279e-05 | 2.16051783291e-06 |
|   5    | 2.24693854623e-05 | 2.16051783291e-06 |
|   6    | 1.87244878853e-05 | 2.16051783291e-06 |
|   7    | 1.60495610445e-05 | 2.16051783291e-06 |
|   8    | 2.80867318279e-05 | 4.90068679173e-06 |
|   9    |  2.4965983847e-05 | 4.90068679173e-06 |
|   10   | 2.24693854623e-05 | 4.90068679173e-06 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.6292197623667362)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  29183  |   1   | 4.2272716


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    |        0.0        |        0.0        |
|   2    |        0.0        |        0.0        |
|   3    |        0.0        |        0.0        |
|   4    | 8.42601954837e-05 | 9.12842166634e-06 |
|   5    | 6.74081563869e-05 | 9.12842166634e-06 |
|   6    | 5.61734636558e-05 | 9.12842166634e-06 |
|   7    | 4.81486831335e-05 | 9.12842166634e-06 |
|   8    | 4.21300977418e-05 | 9.12842166634e-06 |
|   9    | 3.74489757705e-05 | 9.12842166634e-06 |
|   10   | 3.37040781935e-05 | 9.12842166634e-06 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.628409527114778)

Per User RMSE (best)
+---------+-------+-----------------+
| user_id | count |       rmse      |
+---------+-------+-----------------+
|  27525  |   1   | 0.0010354481455 


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    |        0.0        |        0.0        |
|   2    | 0.000156510851419 | 0.000130765581072 |
|   3    | 0.000173900946021 |  0.0001341431212  |
|   4    | 0.000130425709516 |  0.0001341431212  |
|   5    | 0.000104340567613 |  0.0001341431212  |
|   6    | 0.000121730662215 | 0.000138240888789 |
|   7    | 0.000178869544479 | 0.000158407578932 |
|   8    | 0.000169553422371 | 0.000159877164392 |
|   9    |  0.00016230754962 | 0.000171470560793 |
|   10   | 0.000156510851419 | 0.000275811128406 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.6842899949153836)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  35947  |   1   | 0.0009731


Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    | 0.0171118530885 | 0.00264128868978 |
|   2    | 0.0138772954925 | 0.0038923496042  |
|   3    | 0.0189204229271 | 0.00920081782274 |
|   4    | 0.0177378964942 | 0.0119239373748  |
|   5    | 0.0212020033389 | 0.0168512077775  |
|   6    |  0.025963411241 | 0.0259801015322  |
|   7    | 0.0303631051753 | 0.0326334247635  |
|   8    | 0.0310021911519 | 0.0357775625486  |
|   9    | 0.0314065108514 | 0.0400589920878  |
|   10   | 0.0325959933222 | 0.0445253970718  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.8806327028721126)

Per User RMSE (best)
+---------+-------+------------------+
| user_id | count |       rmse       |
+---------+-------+------------------+
|  45217  |   1   | 0.00215588360157 |
+---------+-------+----------------


Precision and recall summary statistics by cutoff
+--------+------------------+------------------+
| cutoff |  mean_precision  |   mean_recall    |
+--------+------------------+------------------+
|   1    |       0.0        |       0.0        |
|   2    | 0.0113731218698  | 0.00196081577881 |
|   3    | 0.00758208124652 | 0.00196081577881 |
|   4    | 0.00568656093489 | 0.00196081577881 |
|   5    | 0.00459098497496 | 0.00196345224442 |
|   6    | 0.00382582081247 | 0.00196345224442 |
|   7    | 0.00327927498211 | 0.00196345224442 |
|   8    | 0.00286936560935 | 0.00196345224442 |
|   9    | 0.00255054720831 | 0.00196345224442 |
|   10   | 0.00229549248748 | 0.00196345224442 |
+--------+------------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.8088810661157144)

Per User RMSE (best)
+---------+-------+------------------+
| user_id | count |       rmse       |
+---------+-------+------------------+
|  41505  |   1   | 0.00140546995139 |
+---------+-------+--


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    | 0.000104340567613 | 7.14661422005e-07 |
|   2    | 0.000104340567613 | 1.94219751157e-06 |
|   3    | 6.95603784085e-05 | 1.94219751157e-06 |
|   4    | 5.21702838063e-05 | 1.94219751157e-06 |
|   5    | 6.26043405676e-05 | 2.15213829147e-06 |
|   6    | 6.95603784085e-05 | 0.000106492705904 |
|   7    |  5.9623181493e-05 | 0.000106492705904 |
|   8    | 6.52128547579e-05 | 0.000210833273517 |
|   9    | 6.95603784085e-05 | 0.000211409740741 |
|   10   | 7.30383973289e-05 | 0.000315750308354 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.6530751475047603)

Per User RMSE (best)
+---------+-------+------------------+
| user_id | count |       rmse       |
+---------+-------+------------------+
|  36435  |   1   | 0.0017025743


Precision and recall summary statistics by cutoff
+--------+-------------------+-------------------+
| cutoff |   mean_precision  |    mean_recall    |
+--------+-------------------+-------------------+
|   1    |        0.0        |        0.0        |
|   2    |        0.0        |        0.0        |
|   3    | 3.47801892042e-05 | 7.14661422005e-07 |
|   4    | 2.60851419032e-05 | 7.14661422005e-07 |
|   5    | 4.17362270451e-05 | 0.000105055229035 |
|   6    | 5.21702838063e-05 | 0.000107801033446 |
|   7    |  5.9623181493e-05 | 0.000109028569535 |
|   8    | 5.21702838063e-05 | 0.000109028569535 |
|   9    | 4.63735856056e-05 | 0.000109028569535 |
|   10   | 4.17362270451e-05 | 0.000109028569535 |
+--------+-------------------+-------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 1.6416101507963237)

Per User RMSE (best)
+---------+-------+-------------------+
| user_id | count |        rmse       |
+---------+-------+-------------------+
|  37739  |   1   | 0.0004038

In [10]:
## Printing the result of 5 fold Cross - Validation result
print "Model 1 : Factorization Machines with Rating Database :{0:.4f}".format(np.mean(result_mf_rating))
print "Model 2 : Rank based Factorization Machines with Rating Database :{0:.4f}".format(np.mean(result_rmf_rating))
print "Model 3 : Factorization Machines with Rating Database and Movie Genre as side features :{0:.4f}".format(np.mean(result_mf_rating_genre))
print "Model 4 : Factorization Machines with Rating Database and Movie OMDB as side features :{0:.4f}".format(np.mean(result_mf_rating_omdb))
print "Model 5 : Factorization Machines with Rating Database, Genre and  Movie OMDB as side features :{0:.4f}".format(np.mean(result_mf_rating_full))

Model 1 : Factorization Machines with Rating Database :1.6944
Model 2 : Rank based Factorization Machines with Rating Database :1.8760
Model 3 : Factorization Machines with Rating Database and Movie Genre as side features :1.8224
Model 4 : Factorization Machines with Rating Database and Movie OMDB as side features :1.6567
Model 5 : Factorization Machines with Rating Database, Genre and  Movie OMDB as side features :1.6498


## 4) Visualization

In [20]:
# As seen above Movie and Genre Features combined give better performance let's build one visualization on it
mf_rating_full = gl.ranking_factorization_recommender.create(ratings, target='rating',item_data = movie_features_full,side_data_factorization=True,random_seed=30)
view = mf_rating_full.views.explore(item_data=movies_name,item_name_column='title')
view.show()

View object

URI: 		http://localhost:32212/view/2baa26f3-230b-4b5c-8612-b9f70569d53e
HTML: 		
<gl-recommender-explore
    uri="http://localhost:32212/view/6a0c5dd3-459a-4547-ba4f-8cabdfaa188d"
    api_key=""
/>
        