# Load Libraries

In [3]:
import graphlab as gl
from os import path

gl.canvas.set_target('ipynb')
#gl.canvas.set_target('browser')

# Load Data

In [4]:
data_dir = "../Resources/Data/Raw/ml-latest/"

In [5]:
links_sf = gl.SFrame.read_csv("../Resources/Data/Derived/links_redux.csv", verbose=False)

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1487126349.log
INFO:graphlab.cython.cy_server:GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1487126349.log


This non-commercial license of GraphLab Create for academic use is assigned to zr28@cornell.edu and will expire on February 15, 2018.


In [6]:
# Table of movies we are recommending: movieId, title, genres
items = gl.SFrame.read_csv(path.join(data_dir, 'movies.csv'), verbose=False)

In [7]:
# Get the metadata ready
urls = gl.SFrame.read_csv(path.join(data_dir, 'links.csv'), verbose=False)
items = items.join(urls, on='movieId')
items = items.join(links_sf, on='movieId')

In [8]:
items.head(3)

movieId,title,genres,imdbId,tmdbId,X1,imdbPictureURL
1,Toy Story (1995),Adventure|Animation|Child ren|Comedy|Fantasy ...,114709,862,0,http://ia.media-imdb.com/ images/M/MV5BMTMwNDU0 ...
2,Jumanji (1995),Adventure|Children|Fantas y ...,113497,8844,1,http://ia.media-imdb.com/ images/M/MV5BMzM5NjE1 ...
3,Grumpier Old Men (1995),Comedy|Romance,113228,15602,2,http://ia.media-imdb.com/ images/M/MV5BMTI5MTgy ...


In [9]:
# Table of interactions between users and items: userId, movieId, rating, timestamp
actions = gl.SFrame.read_csv(path.join(data_dir, 'ratings.csv'), verbose=False)

In [10]:
actions.head(5)

userId,movieId,rating,timestamp
1,122,2.0,945544824
1,172,1.0,945544871
1,1221,5.0,945544788
1,1441,4.0,945544871
1,1609,3.0,945544824


In [11]:
actions["rating"].show()

# Split Data

In [14]:
training_data, validation_data = gl.recommender.util.random_split_by_user(actions, 'userId', 'movieId',random_seed=0)

# Create Recomender Model

In [21]:
model_with_rating_target = gl.recommender.create(training_data, 'userId', 'movieId',target="rating")

# Save / Load Model

In [22]:
#model_with_rating_target.save("../Resources/Data/Derived/model_with_rating_target")

In [33]:
# Two files need to be unzipped to equal the contents of the "model_with_rating_target"
# Once they are unzipped you should be able to reload it with the gl.load_model function
!unzip ../Resources/Data/Derived/model_with_rating_target.zip -d "../Resources/Data/Derived/model_with_rating_target"
!unzip ../Resources/Data/Derived/m_4f5f00486f9c6a42.0000.zip -d "../Resources/Data/Derived/model_with_rating_target"

Archive:  ../Resources/Data/Derived/model_with_rating_target.zip
  inflating: ../Resources/Data/Derived/model_with_rating_target/dir_archive.ini  
  inflating: ../Resources/Data/Derived/model_with_rating_target/m_4d89d24e8679e7ec.0000  
  inflating: ../Resources/Data/Derived/model_with_rating_target/m_4d89d24e8679e7ec.frame_idx  
  inflating: ../Resources/Data/Derived/model_with_rating_target/m_4d89d24e8679e7ec.sidx  
  inflating: ../Resources/Data/Derived/model_with_rating_target/m_4f5f00486f9c6a42.sidx  
  inflating: ../Resources/Data/Derived/model_with_rating_target/m_6271ee7d449ef0a0.0000  
  inflating: ../Resources/Data/Derived/model_with_rating_target/m_6271ee7d449ef0a0.frame_idx  
  inflating: ../Resources/Data/Derived/model_with_rating_target/m_6271ee7d449ef0a0.sidx  
  inflating: ../Resources/Data/Derived/model_with_rating_target/m_ecca79d3ffa9d7f4.0000  
  inflating: ../Resources/Data/Derived/model_with_rating_target/m_ecca79d3ffa9d7f4.frame_idx  
  inflating: ../Resources/Da

In [12]:
model_with_rating_target = gl.load_model("../Resources/Data/Derived/model_with_rating_target")

# View Data -- 

# A) For an interactive view - Click on the url outputed below

In [15]:
interactive_overview = model_with_rating_target.views.overview(validation_set=validation_data,item_data=items,item_name_column='title',item_url_column='imdbPictureURL')

In [16]:
interactive_overview.uri

'http://localhost:32212/view/20d65e5a-d587-43d3-a168-8484a2dcebb5'

In [17]:
interactive_overview.show()

<img src="../Visualizations/Ranking_Factorization_Recommender_sample_outputA.png" style="width: 600px"/>
<img src="../Visualizations/Ranking_Factorization_Recommender_sample_outputB.png" style="width: 600px"/>
<img src="../Visualizations/Ranking_Factorization_Recommender_sample_outputC.png" style="width: 600px"/>

# B) Exploring other model results...

In [39]:
print model_with_rating_target.recommend([245],k=5).join(items[["movieId","title","genres"]], on="movieId").sort("rank")

+--------+---------+---------------+------+-------------------------------+
| userId | movieId |     score     | rank |             title             |
+--------+---------+---------------+------+-------------------------------+
|  245   |   2571  | 5.69516382782 |  1   |       Matrix, The (1999)      |
|  245   |   110   | 5.49247453777 |  2   |       Braveheart (1995)       |
|  245   |   589   |  5.4849626836 |  3   | Terminator 2: Judgment Day... |
|  245   |   318   | 5.36152295439 |  4   | Shawshank Redemption, The ... |
|  245   |   356   | 5.33043525784 |  5   |      Forrest Gump (1994)      |
+--------+---------+---------------+------+-------------------------------+
+--------------------------+
|          genres          |
+--------------------------+
|  Action|Sci-Fi|Thriller  |
|     Action|Drama|War     |
|      Action|Sci-Fi       |
|       Crime|Drama        |
| Comedy|Drama|Romance|War |
+--------------------------+
[5 rows x 6 columns]



In [40]:
model_with_rating_target.recommend([110],k=20).join(items[["movieId","title","genres"]], on="movieId").sort("rank")

userId,movieId,score,rank,title,genres
110,1207,5.10486749975,1,To Kill a Mockingbird (1962) ...,Drama
110,2501,5.02713600962,2,October Sky (1999),Drama
110,914,4.99386928646,3,My Fair Lady (1964),Comedy|Drama|Musical|Roma nce ...
110,1961,4.95985041229,4,Rain Man (1988),Drama
110,527,4.91736112206,5,Schindler's List (1993),Drama|War
110,1307,4.90353677837,6,When Harry Met Sally... (1989) ...,Comedy|Romance
110,1234,4.88152490227,7,"Sting, The (1973)",Comedy|Crime
110,954,4.8557215151,8,Mr. Smith Goes to Washington (1939) ...,Drama
110,1225,4.85325650541,9,Amadeus (1984),Drama
110,2324,4.85189829437,10,Life Is Beautiful (La Vita è bella) (1997) ...,Comedy|Drama|Romance|War


In [41]:
model_with_rating_target.get_similar_users([40])

userId,similar,score,rank
40,171902,0.795863330364,1
40,40312,0.793588101864,2
40,180521,0.790434002876,3
40,53034,0.787391364574,4
40,197066,0.775171935558,5
40,92211,0.772530674934,6
40,194059,0.768670737743,7
40,84204,0.764811038971,8
40,52684,0.762292563915,9
40,182139,0.758243501186,10


movieId,title,genres,imdbId,tmdbId,X1,imdbPictureURL
1,Toy Story (1995),Adventure|Animation|Child ren|Comedy|Fantasy ...,114709,862,0,http://ia.media-imdb.com/ images/M/MV5BMTMwNDU0 ...
2,Jumanji (1995),Adventure|Children|Fantas y ...,113497,8844,1,http://ia.media-imdb.com/ images/M/MV5BMzM5NjE1 ...
3,Grumpier Old Men (1995),Comedy|Romance,113228,15602,2,http://ia.media-imdb.com/ images/M/MV5BMTI5MTgy ...
4,Waiting to Exhale (1995),Comedy|Drama|Romance,114885,31357,3,http://ia.media-imdb.com/ images/M/MV5BMTczMTMy ...
5,Father of the Bride Part II (1995) ...,Comedy,113041,11862,4,http://ia.media-imdb.com/ images/M/MV5BMTg1NDc2 ...
6,Heat (1995),Action|Crime|Thriller,113277,949,5,http://ia.media-imdb.com/ images/M/MV5BMTM1NDc4 ...
7,Sabrina (1995),Comedy|Romance,114319,11860,6,http://ia.media-imdb.com/ images/M/MV5BMTYyNDM1 ...
8,Tom and Huck (1995),Adventure|Children,112302,45325,7,http://ia.media-imdb.com/ images/M/MV5BMTUxNDYz ...
9,Sudden Death (1995),Action,114576,9091,8,http://ia.media-imdb.com/ images/M/MV5BMTcwMTU2 ...
10,GoldenEye (1995),Action|Adventure|Thriller,113189,710,9,http://ia.media-imdb.com/ images/M/MV5BNTE1OTEx ...


In [43]:
model_with_rating_target.recommend([110],k=20).join(items[["movieId","title","genres"]], on="movieId").sort("rank")

userId,movieId,score,rank,title,genres
110,1207,5.10486749975,1,To Kill a Mockingbird (1962) ...,Drama
110,2501,5.02713600962,2,October Sky (1999),Drama
110,914,4.99386928646,3,My Fair Lady (1964),Comedy|Drama|Musical|Roma nce ...
110,1961,4.95985041229,4,Rain Man (1988),Drama
110,527,4.91736112206,5,Schindler's List (1993),Drama|War
110,1307,4.90353677837,6,When Harry Met Sally... (1989) ...,Comedy|Romance
110,1234,4.88152490227,7,"Sting, The (1973)",Comedy|Crime
110,954,4.8557215151,8,Mr. Smith Goes to Washington (1939) ...,Drama
110,1225,4.85325650541,9,Amadeus (1984),Drama
110,2324,4.85189829437,10,Life Is Beautiful (La Vita è bella) (1997) ...,Comedy|Drama|Romance|War


In [52]:
validation_data.shape

(17660, 4)

In [53]:
model_with_rating_target.get_current_options

<bound method RankingFactorizationRecommender.get_current_options of Class                            : RankingFactorizationRecommender

Schema
------
User ID                          : userId
Item ID                          : movieId
Target                           : rating
Additional observation features  : 1
User side features               : []
Item side features               : []

Statistics
----------
Number of observations           : 24386436
Number of users                  : 259130
Number of items                  : 39443

Training summary
----------------
Training time                    : 1952.7569

Model Parameters
----------------
Model class                      : RankingFactorizationRecommender
num_factors                      : 32
binary_target                    : 0
side_data_factorization          : 1
solver                           : auto
nmf                              : 0
max_iterations                   : 25

Regularization Settings
-----------------------
r

In [56]:
model_with_rating_target.get_similar_items(items = [1])

movieId,similar,score,rank
1,3114,0.946319937706,1
1,2355,0.878918349743,2
1,588,0.86337095499,3
1,4886,0.809542238712,4
1,2687,0.801599025726,5
1,595,0.79145103693,6
1,34,0.78594905138,7
1,6377,0.781840920448,8
1,364,0.780150532722,9
1,1566,0.761427342892,10


In [57]:
model_with_rating_target.evaluate_rmse

<bound method RankingFactorizationRecommender.evaluate_rmse of Class                            : RankingFactorizationRecommender

Schema
------
User ID                          : userId
Item ID                          : movieId
Target                           : rating
Additional observation features  : 1
User side features               : []
Item side features               : []

Statistics
----------
Number of observations           : 24386436
Number of users                  : 259130
Number of items                  : 39443

Training summary
----------------
Training time                    : 1952.7569

Model Parameters
----------------
Model class                      : RankingFactorizationRecommender
num_factors                      : 32
binary_target                    : 0
side_data_factorization          : 1
solver                           : auto
nmf                              : 0
max_iterations                   : 25

Regularization Settings
-----------------------
regular

In [58]:
model_with_rating_target.coefficients

{'intercept': 3.526573788806094, 'movieId': Columns:
 	movieId	int
 	linear_terms	float
 	factors	array
 
 Rows: 39443
 
 Data:
 +---------+-----------------+-------------------------------+
 | movieId |   linear_terms  |            factors            |
 +---------+-----------------+-------------------------------+
 |   122   | -0.353523820639 | [-0.0517144910991, 0.18032... |
 |   172   | -0.190083727241 | [-0.237612366676, 0.018372... |
 |   1221  |  1.16451227665  | [-0.612521648407, 0.140487... |
 |   1441  |  0.405694097281 | [0.0602260418236, -0.03495... |
 |   1609  | -0.512943506241 | [-0.16179767251, 0.1926081... |
 |   1961  |  1.06061267853  | [0.236830979586, 0.1420370... |
 |   1972  |  -1.04974246025 | [-0.0234307218343, 0.08764... |
 |   441   |  0.459097623825 | [-0.60194671154, 0.4550953... |
 |   494   |   0.1333322227  | [0.0111932847649, 0.063164... |
 |   1193  |  1.25358653069  | [-0.559530854225, 0.007200... |
 +---------+-----------------+-----------------------

In [59]:
model_precision_recall = model_with_rating_target.evaluate_precision_recall(validation_data)

In [60]:
model_precision_recall.keys()

['precision_recall_overall', 'precision_recall_by_user']

In [61]:
model_precision_recall["precision_recall_overall"]

cutoff,precision,recall
1,0.177242888403,0.0138150172991
2,0.161378555799,0.0228827882131
3,0.149525893508,0.0297721636602
4,0.139223194748,0.0345174241112
5,0.126039387309,0.0393999309872
6,0.121808898614,0.0462916841104
7,0.115817442951,0.0522941196499
8,0.111187089716,0.0567517202495
9,0.108193532701,0.0610610374904
10,0.105470459519,0.0647670513689


In [62]:
model_precision_recall["precision_recall_by_user"]["cutoff" == 1]
#.groupby("userId", gl.aggregate.ARGMAX)

{'count': 23, 'cutoff': 1, 'precision': 0.0, 'recall': 0.0, 'userId': 1004}