<a href="https://colab.research.google.com/github/yuyan2000/Online-Learning-Moive-RecSys/blob/main/MoiveRecSys.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Moive Recommender System
This project aimes to build a movie recommender system. A new user can first do a survey to allow the system to learn his preferences, and then the system will give recommendations. This is known as the cold-start problem.

Three approaches are presented for this task:
- Offline learning
- Oneline learning - LogisticRegression with epsilon-greedy
- Oneline learning - Contexual bandits

In [16]:
import random
import numpy as np
import pandas as pd
from base64 import b64decode
from IPython import display
from ipywidgets import AppLayout, Button, GridspecLayout, Image, Layout 
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MultiLabelBinarizer
from scipy.optimize import minimize

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {return false;}

<IPython.core.display.Javascript object>

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Utils

In [4]:
def getPoster(index, dataset):
  """
  get the poster of the movie
  """
  return Image(value=dataset.loc[index].poster, format='jpg', width=200, height=400*2//3)

def plotGrid(df, n_col=3):
  n_row = max((len(df)//n_col) + (len(df) % n_col != 0), 1)
  grid = GridspecLayout(n_row, n_col)
  cpt = 0
  for i in range(n_row):
    for j in range(n_col):
      if cpt < len(df):
        grid[i, j] = getPoster(df.index[cpt], df)
        cpt += 1 
  return grid

def createButton(description, button_style):
  return Button(
      description=description,
      button_style=button_style,
      layout=Layout(height='auto', width='auto')
  )

In [5]:
def updateLikes(button):
  """
  Update the column "like" with user's action
  """
  global idx_current

  like = button.description == 'Like'
  train_movies.loc[idx_current, 'like'] = like
   
def updatePoster(idx):
    """
    Update the value of the image widget with the new poster's string
    """
    global idx_current

    idx_current = idx
    poster = train_movies.loc[idx_current].poster
    img.value = poster

In [6]:
def selectRandom():
  """
  Select randomly a new movie not seen
  """
  idx = random.randint(0, len(train_movies)-1)  # index of the movie
  while train_movies.loc[idx].like is not None:
    idx = random.randint(0, len(train_movies)-1)
  return idx

def giveRecommendation(X_train, X_test, y_train):
  model = LogisticRegression(fit_intercept=False)
  
  if len(np.unique(y_train.dropna().values)) == 2:
    model.fit(X_train[~y_train.isna()], list(y_train.dropna().values))
    probs = model.predict_proba(X_test)[:,1]
  else:
    probs = np.zeros(X_test.shape[0])

  test_movies.drop(columns=['prediction'], errors='ignore', inplace=True)
  test_movies.insert(loc=0, column='prediction', value=probs)
  
  return test_movies.sort_values(by='prediction', ascending=False)

In [7]:
# First we add a label to the test set X_test : ranking based on our preferences (from 1 to 29)

# ranking = [10,29,3,16,8,12,4,11,6,7,18,19,14,20,5,22,21,1,9,17,28,23,24,15,25,26,27,2,13]
# ranksD = pd.concat([pd.DataFrame(test_movies.loc[:,'Title'].index),pd.DataFrame(test_movies.loc[:,'Title']).set_axis(range(29),axis=0),pd.Series(ranking)],axis=1).set_axis(["ID","Title","Rank"],axis=1)
# ranksD = ranksD.set_index('ID')
# ranksD = ranksD.sort_values('Rank')

def testMovies(probs):
    probs = probs.loc[:,'Title']
    probs = pd.concat([probs.set_axis(range(1,30),axis=0),pd.DataFrame(probs.index).set_axis(range(1,30),axis=0),pd.DataFrame(range(1,30)).set_axis(range(1,30),axis=0)],axis=1)
    probs = probs.set_axis(['Title','ID','Rank'],axis=1)
    probs = probs.set_index('ID')
  
    loss = 0

    for i,id in enumerate(probs.index):
        print("Predicted number",i+1,":",id,probs.loc[id,'Title'], "| Real number :", ranksD.loc[id,'Rank'], "diff :", np.abs(probs.iloc[i,1] - ranksD.loc[id,'Rank']))
        loss = loss +  np.abs(probs.iloc[i,1] - ranksD.loc[id,'Rank']) / 29
    return loss/29

# Data Preprocessing

In [8]:
df = pd.read_pickle('/content/drive/MyDrive/IA318/movie_database.pickle')
print('number of items: ', len(df))
df.head()

number of items:  1037


Unnamed: 0,Actors,Awards,Country,Director,Genre,Language,Rated,Released,Title,imdbID,imdbRating,Metascore,Box_office,imdbVotes,Runtime,poster
0,"[Mark Hamill, Harrison Ford, Carrie Fisher, Bi...",Won 1 Oscar. Another 15 wins & 18 nominations.,[USA],[Irvin Kershner],"[Action, Adventure, Fantasy]",[English],[PG],1980-06-20,Star Wars: Episode V - The Empire Strikes Back,tt0080684,8.8,79.0,290158751.0,799579.0,124.0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
1,"[Kareem Abdul-Jabbar, Lloyd Bridges, Peter Gra...",Nominated for 1 Golden Globe. Another 2 wins &...,[USA],"[Jim Abrahams, David Zucker, Jerry Zucker]",[Comedy],[English],[PG],1980-07-02,Airplane!,tt0080339,7.8,,83400000.0,154994.0,88.0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
2,"[John Belushi, Dan Aykroyd, James Brown, Cab C...",1 win.,[USA],[John Landis],"[Action, Comedy, Crime]",[English],[R],1980-06-20,The Blues Brothers,tt0080455,7.9,,54200000.0,138196.0,133.0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
3,"[Jack Nicholson, Shelley Duvall, Danny Lloyd, ...",3 wins & 5 nominations.,"[USA, UK]",[Stanley Kubrick],"[Drama, Horror]",[English],[R],1980-05-23,The Shining,tt0081505,8.4,61.0,,584323.0,146.0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4,"[Anthony Hopkins, John Hurt, Anne Bancroft, Jo...",Nominated for 8 Oscars. Another 10 wins & 14 n...,"[USA, UK]",[David Lynch],"[Biography, Drama]",[English],[PG],1980-10-10,The Elephant Man,tt0080678,8.2,,,156572.0,124.0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [9]:
# categorical features
CAT_FEATURES = ["Actors", "Country", "Director", "Genre", "Rated"]

# numerical features
NUM_FEATURES = ["imdbRating", "Metascore", "Box_office", "imdbVotes"]


# split the data into train and test
train_movies = df[df.Released.dt.year < 2015]
test_movies = df[df.Released.dt.year == 2015]

train_movies = train_movies.assign(tag='train')
test_movies = test_movies.assign(tag='test')

data = pd.concat([train_movies, test_movies])
X = data[NUM_FEATURES] / data[NUM_FEATURES].max()
for feature in CAT_FEATURES:
  mlb = MultiLabelBinarizer()
  X = pd.concat([X, pd.DataFrame(mlb.fit_transform(data[feature]),columns=mlb.classes_, index=data.index)], axis=1)
X = X.fillna(0)
X_train = X[data['tag'] == 'train']
X_test = X[data['tag'] == 'test']

# Offline learning
In the offline learning, the recommander system starts its learning after a series of choices have been made by the user. This means it proposes a movie randomly rather than based on user's previous preferences during the survey.

## Survey

In [27]:
random.seed(84)

# add or re-assigned the column "like" to record user's actions
train_movies = train_movies.assign(like=None)
test_movies = test_movies.assign(like=None)

left_button = createButton('Dislike', 'danger')  # Dislike button
right_button = createButton('Like', 'success')  # Like button

idx_current = selectRandom()
img = getPoster(idx_current, train_movies)  # movie poster

# set the on_click function to the button
def updateMovie(button):
  updateLikes(button)
  idx_new = selectRandom()
  updatePoster(idx_new)
  
left_button.on_click(updateMovie)
right_button.on_click(updateMovie)

AppLayout(
    left_sidebar=left_button,
    center=img,
    right_sidebar=right_button)

AppLayout(children=(Button(button_style='danger', description='Dislike', layout=Layout(grid_area='left-sidebar…

In [32]:
viewed_movies = train_movies[~train_movies['like'].isna()]

print('movie like:')
plotGrid(viewed_movies[viewed_movies.like==True], n_col=5)

movie like:


GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xf…

In [33]:
print('movie dislike:')
plotGrid(viewed_movies[viewed_movies.like==False], n_col=5)

movie dislike:


GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xf…

## Ranking Test Movies

In [39]:
y_train = train_movies.like

test_movies = test_movies.drop(columns=['prediction'], errors='ignore')  # # reset the column "prediction"
ranked_movies = giveRecommendation(X_train, X_test, y_train)
# print("Loss :", testMovies(ranked_movies))

In [36]:
plotGrid(ranked_movies, n_col=5)

GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xf…

# Oneline learning - LogisticRegression with epsilon-greedy
In online learning with $\epsilon$-greedy policy, the recommender system can not only exploit user's previous preferences, but also do exploration by random selection. This allows to refine the preferences of the user while continuing to learn preferences of the users in other domains of movies.

## Survey

In [10]:
def selectGreedy():
  """
  if random_value<eps or not enough "like" label
  else select the best
  """        
  random_value = random.random()
  y_train = train_movies.like  # user's actions

  # if (not at least 1 like and 1 dislike) or (random value < eps)
  if len(np.unique(y_train.dropna().values)) != 2 or random_value < eps:
    row = selectRandom()
    # print('Select a random movie, possible training:', len(np.unique(y.dropna().values))==2, ', random value < epsilon:', random_value<eps)
  else:
    model = LogisticRegression(fit_intercept=False)
    model.fit(X_train[~y_train.isna()], list(y_train.dropna().values))
    probs = model.predict_proba(X_train[y_train.isna()])[:,1]  # predict X_train with no user actions
    row = np.argmax(probs)
    # print('Select the best movie')
    
  return row

In [None]:
random.seed(84)
eps = 0.5

In [11]:
# add or re-assigned the column "like" to record user's actions
train_movies = train_movies.assign(like=None)
test_movies = test_movies.assign(like=None)

left_button = createButton('Dislike', 'danger')  # Dislike button
right_button = createButton('Like', 'success')  # Like button

idx_current = selectRandom()
img = getPoster(idx_current, train_movies)  # movie poster


# set the on_click function to the button
def updateMovie(button):
  updateLikes(button)
  idx_new = selectGreedy()
  updatePoster(idx_new)

left_button.on_click(updateMovie)
right_button.on_click(updateMovie)

AppLayout(
  left_sidebar=left_button,
  center=img,
  right_sidebar=right_button)

AppLayout(children=(Button(button_style='danger', description='Dislike', layout=Layout(grid_area='left-sidebar…

In [12]:
viewed_movies = train_movies[~train_movies['like'].isna()]

print('movie like:')
plotGrid(viewed_movies[viewed_movies.like==True], n_col=5)

movie like:


GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xf…

In [13]:
print('movie dislike:')
plotGrid(viewed_movies[viewed_movies.like==False], n_col=5)

movie dislike:


GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xf…

## Ranking Test Movies

In [14]:
y_train = train_movies.like

test_movies = test_movies.drop(columns=['prediction'], errors='ignore')  # # reset the column "prediction"
ranked_movies = giveRecommendation(X_train, X_test, y_train)
# print("Loss :", testMovies(ranked_movies))

In [15]:
plotGrid(ranked_movies, n_col=5)

GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xf…

# Online learning - Contextual bandits
we use contextual bandits to make the survey more efficient. We use an algorithm inspired by **Thompson sampling**, proposed in the following paper (see Algorithm 3): [An Empirical Evaluation of Thompson Sampling](https://papers.nips.cc/paper/4321-an-empirical-evaluation-of-thompson-sampling.pdf) <br> 

It is a Bayesian logistic regression where the weights are Gaussian random variables, drawn from the posterior distribution, to enforce exploration. When the number of samples grow, the posterior distributions tend to concentrate and the weights converge to the optimal values.

More precisely, the *like* probability predicted for sample $x$ (a movie represented by its $d$ features) is:
$$
p(x) = \frac{1}{1 + e^{-w^Tx}}
$$
where $w$ is the vector of weights (of dimension $d$).

These weights $w_1,\ldots,w_d$ (one per feature) are assumed to be independent Gaussian random variables:
$$w_j = \mathcal{N}(m_j, \frac{\alpha}{q_j})$$
where the parameters $m_j$ (the means) and $q_j$ (inversely proportional to the variances) are learned while $\alpha >0$ is a hyperparameter.

We initialize all parameters $q_j$ to some hyperparameter $\lambda$ (the higher $\lambda$, the lower the variance and thus the lower the exploration). Then given the sequence of samples $x_1,\ldots,x_n$, with labels $y_1,\ldots,y_n$ (the likes), the parameters are updated as follows:

1. Find the vector of weights $v$ minimizing $\frac{1}{2}\sum_{j=1}^{d} q_j(v_j - m_j)^2 + \sum_{i=1}^{n} \textrm{log}(1 + \textrm{exp}(-y_iv^Tx_i))$
2. Update $m_j = v_j$ and $q_j = q_j + \sum_{i=1}^{n} x^2_{ij}p_i(1-p_i)$ where $p_i$ is the value predicted for sample $i$ using the vector of weights $v$:
$$p_i = \frac 1 {1 + e^{-v^Tx_i}}$$


To summarize, the variables are:
* $x_1,\ldots,x_n$: the contexts (features of the movies)
* $y_1,\ldots,y_n$: the labels (like or dislike)
* $w$: the vector of weights (random)
* $m$: the vector of means (to be learned)
* $q$: the vector inversely proportional to the variance (to be learned)



## Survey

In [19]:
class OnlineLogisticRegression:
  def __init__(self, n_dim, lamb=5, alpha=5):
    # hyperparameters
    self.lamb = lamb
    self.alpha = alpha

    # parameters of the model
    self.n_dim = n_dim,
    self.m = np.zeros(self.n_dim)
    self.q = np.ones(self.n_dim) * self.lamb

    # weights
    self.w = np.random.normal(
        self.m,
        self.alpha / self.q,
        size = self.n_dim)

  def loss(self, v, *args):
    X, y = args

    # minimizing: 1/2 * sum( q_j (v_j−m_j)^2 ) + sum( log(1+exp(−y_i vT x_i) )
    sum1 = (self.q * (v-self.m)**2).sum(axis=0)
    sum2 = np.array([
          np.log(1 + np.exp((-1)*y[i]*v.dot(X[i])))
          for i in range(y.shape[0])
        ]).sum(axis=0)

    loss = 0.5 * sum1 + sum2      
    return loss

  def grad(self, v, *args):
    X, y = args
    second_term = np.array([
        y[j] *  X[j] / (1. + np.exp(y[j] * v.dot(X[j])))
        for j in range(y.shape[0])
    ]).sum(axis=0)
    v = self.q * (v - self.m) - second_term
    return v

  def get_weights(self):        
    weights = np.random.normal(self.m, self.alpha / self.q, size = self.n_dim)
    return weights

  def fit(self, X, y):
    # find v
    self.v = minimize(
        self.loss,
        self.w,
        args=(X, y),
        jac=self.grad,
        method="L-BFGS-B",
        options={'maxiter': 20, 'disp':False}
    ).x

    # update m and q
    self.m = self.v

    p = np.array([
        1 / (1 + np.exp(-self.v.dot(X[i])))
        for i in range(y.shape[0])
    ])

    self.q += np.array([
        np.array([
            (X[i][j]**2) * p[i] * (1-p[i])
            for i in range(p.shape[0])
        ]).sum(axis=0)
        for j in range(self.q.shape[0])
    ])

  def predict_proba(self, X, mode='sample'):
    # sampling weights after update
    self.w = self.get_weights()

    # using weight depending on mode
    if mode == 'sample':
        w = self.w  # weights are samples of posteriors
    elif mode == 'expected':
        w = self.m  # weights are expected values of posteriors
    else:
        raise Exception('mode not recognized!')

    # calculating probabilities
    proba = 1 / (1 + np.exp(-X.dot(w)))
    return np.array([1-proba , proba]).T

In [20]:
def selectBayes():
  if len(train_movies.like.unique()) != 3:  # True, False and None
    return selectRandom()    
  
  X = X_train
  y = train_movies.like
  
  # Init the model
  model = OnlineLogisticRegression(n_dim=X.shape[1])

  # Fit to data
  model.fit(X[~y.isna()].values, y.dropna().values)

  # Make prediction
  probs = model.predict_proba(X_train[y.isna()])[:,1]

  # Choose the best movie
  row = np.argmax(probs)
  
  return row

In [23]:
random.seed(84)

# add or re-assigned the column "like" to record user's actions
train_movies = train_movies.assign(like=None)
test_movies = test_movies.assign(like=None)

left_button = createButton('Dislike', 'danger')  # Dislike button
right_button = createButton('Like', 'success')  # Like button

idx_current = selectRandom()
img = getPoster(idx_current, train_movies)  # movie poster

# set the on_click function to the button
def updateMovie(button):
  updateLikes(button)
  current_row = selectBayes()
  updatePoster(current_row)

left_button.on_click(updateMovie)
right_button.on_click(updateMovie)

AppLayout(
    left_sidebar=left_button,
    center=img,
    right_sidebar=right_button
)

AppLayout(children=(Button(button_style='danger', description='Dislike', layout=Layout(grid_area='left-sidebar…

In [24]:
viewed_movies = train_movies[~train_movies['like'].isna()]

print('movie like:')
plotGrid(viewed_movies[viewed_movies.like==True], n_col=5)

movie like:


GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xf…

In [25]:
print('movie dislike:')
plotGrid(viewed_movies[viewed_movies.like==False], n_col=5)

movie dislike:


GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xf…

## Ranking Test Movies

In [26]:
y_train = train_movies.like

In [27]:
test_movies.drop(columns=['prediction'], errors="ignore", inplace=True)

In [29]:
ranked_movies = giveRecommendation(X_train, X_test, y_train)
# print("Loss :", testMovies(ranked_movies))

In [30]:
plotGrid(ranked_movies, n_col=5)

GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xf…