<a href="https://colab.research.google.com/github/willystw/fastai-learning/blob/bear-multi-classification/%20movie-recommender/movie_recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#hide
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

[K     |████████████████████████████████| 720 kB 6.2 MB/s 
[K     |████████████████████████████████| 46 kB 5.2 MB/s 
[K     |████████████████████████████████| 1.2 MB 35.9 MB/s 
[K     |████████████████████████████████| 186 kB 50.7 MB/s 
[K     |████████████████████████████████| 56 kB 4.8 MB/s 
[K     |████████████████████████████████| 51 kB 380 kB/s 
[?25hMounted at /content/gdrive


In [2]:
#hide
from fastbook import *

In [3]:
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)

In [4]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None, names=['user','movie','rating','timestamp'])
ratings.head(10)

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596
5,298,474,4,884182806
6,115,265,2,881171488
7,253,465,5,891628467
8,305,451,3,886324817
9,6,86,3,883603013


In [5]:
movies = pd.read_csv(path/'u.item', delimiter='|', header=None, usecols=(0,1), names=('movie', 'title'), encoding='latin-1')
movies.head(10)

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)
5,6,Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)
6,7,Twelve Monkeys (1995)
7,8,Babe (1995)
8,9,Dead Man Walking (1995)
9,10,Richard III (1995)


In [7]:
ratings = ratings.merge(movies)
ratings.head()

Unnamed: 0,user,movie,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [8]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,542,My Left Foot (1989),4
1,422,Event Horizon (1997),3
2,311,"African Queen, The (1951)",4
3,595,Face/Off (1997),4
4,617,Evil Dead II (1987),1
5,158,Jurassic Park (1993),5
6,836,Chasing Amy (1997),3
7,474,Emma (1996),3
8,466,Jackie Chan's First Strike (1996),3
9,554,Scream (1996),3


In [None]:
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))

In [None]:
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.913259,0.943922,00:11
1,0.817469,0.873518,00:11
2,0.733714,0.829292,00:11
3,0.581267,0.816393,00:11
4,0.479274,0.817592,00:11


In [None]:
learn = collab_learner(dls, use_nn=True, y_range=(0, 5.5), layers=[100,50])
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.995056,0.985757,00:15
1,0.959629,0.906348,00:16
2,0.868694,0.877667,00:16
3,0.842771,0.852472,00:15
4,0.736606,0.854832,00:15


In [19]:
num_users = len(dls.classes['user'])
num_movies = len(dls.classes['title'])

In [12]:
class DotProduct(Module):
  def __init__(self, n_movies, n_users, n_factors, y_range=(0,5.5)):
    self.users_factors = Embedding(n_users, n_factors)
    self.movies_factors = Embedding(n_movies, n_factors)
    self.y_range = y_range

  def forward(self, x):
    users = self.users_factors(x[:,0])    
    movies = self.movies_factors(x[:,1])
    return sigmoid_range((users * movies).sum(dim=1), *self.y_range)

In [17]:
class DotProductWithBias(Module):
  def __init__(self, n_movies, n_users, n_factors, y_range=(0,5.5)):
    self.users_factors = Embedding(n_users, n_factors)
    self.users_bias = Embedding(n_users, 1)
    self.movies_factors = Embedding(n_movies, n_factors)
    self.movies_bias = Embedding(n_movies, 1)
    self.y_range = y_range

  def forward(self, x):
    users = self.users_factors(x[:,0])    
    movies = self.movies_factors(x[:,1])

    result = (users*movies).sum(dim=1, keepdim=True)
    result += self.users_bias(x[:,0]) + self.movies_bias(x[:,1])
    return sigmoid_range(result, *self.y_range)

In [22]:
model = DotProduct(num_movies, num_users, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,1.010132,0.999164,00:08
1,0.872788,0.9185,00:08
2,0.703211,0.879761,00:08
3,0.470672,0.882529,00:08
4,0.372899,0.887957,00:08


In [25]:
model_bias = DotProductWithBias(num_movies, num_users, 50)
learn = Learner(dls, model_bias, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.915965,0.940251,00:09
1,0.837371,0.873567,00:09
2,0.605706,0.874913,00:09
3,0.408935,0.898367,00:09
4,0.284846,0.904894,00:09


In [23]:
model_bias = DotProductWithBias(num_movies, num_users, 50)
learn = Learner(dls, model_bias, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.9271,0.951742,00:09
1,0.83638,0.882923,00:09
2,0.763345,0.83713,00:09
3,0.618523,0.821908,00:09
4,0.493908,0.822925,00:08


In [28]:
learn.model

DotProductWithBias(
  (users_factors): Embedding(944, 50)
  (users_bias): Embedding(944, 1)
  (movies_factors): Embedding(1665, 50)
  (movies_bias): Embedding(1665, 1)
)

In [30]:
movie_bias = learn.model.movies_bias.weight.squeeze()

In [32]:
idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes['title'][i] for i in idxs]

['Titanic (1997)',
 'Shawshank Redemption, The (1994)',
 "Schindler's List (1993)",
 'Star Wars (1977)',
 'Silence of the Lambs, The (1991)']

In [38]:
embs = get_emb_sz(dls)
embs

[(944, 74), (1665, 102)]

In [45]:
class CollabNN(Module):
  def __init__(self, user_size, item_size, y_range=(0,5.5), n_act = 100):
    self.user_factors = Embedding(*user_size)
    self.item_factors = Embedding(*item_size)
    self.y_range = y_range
    self.layers = nn.Sequential(
        nn.Linear(user_size[1] + item_size[1], n_act),
        nn.ReLU(),
        nn.Linear(n_act, 1)
    )
  def forward(self, x):
    embs = self.user_factors(x[:,0]), self.item_factors(x[:,1])
    x = self.layers(torch.cat(embs, dim=1))
    return sigmoid_range(x, *self.y_range)

In [43]:
class CollabNN(Module):
    def __init__(self, user_sz, item_sz, y_range=(0,5.5), n_act=100):
        self.user_factors = Embedding(*user_sz)
        self.item_factors = Embedding(*item_sz)
        self.layers = nn.Sequential(
            nn.Linear(user_sz[1]+item_sz[1], n_act),
            nn.ReLU(),
            nn.Linear(n_act, 1))
        self.y_range = y_range
        
    def forward(self, x):
        embs = self.user_factors(x[:,0]),self.item_factors(x[:,1])
        x = self.layers(torch.cat(embs, dim=1))
        return sigmoid_range(x, *self.y_range)

In [48]:
nn_model = CollabNN(*embs)
learn = Learner(dls, nn_model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.01)

epoch,train_loss,valid_loss,time
0,0.932937,0.97137,00:10
1,0.8925,0.928661,00:11
2,0.843213,0.877523,00:11
3,0.813533,0.86229,00:11
4,0.714689,0.864303,00:11
