In [2]:
# Install libraries
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 10.3 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1619457 sha256=0612c3915d32b4536a8abd0dac0ff7ad1a5a7e9793440f50f787b42c73c7b750
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


In [3]:
import os
import pandas as pd
from datetime import datetime

from surprise import SVD
from surprise.model_selection import GridSearchCV as SurpriseGridSearchCV
from surprise.dump import dump as surpriseDump
from surprise.dump import load as surpriseLoad
from surprise import BaselineOnly, KNNBaseline, SVD, SVDpp
from surprise import Reader, Dataset

In [4]:
# Define File paths
trainFeaturePath = "/content/drive/MyDrive/Netflix Movie recommendation/data/updated_train_data_frame.csv"
testFeaturePath = "/content/drive/MyDrive/Netflix Movie recommendation/data/test_data_frame.csv"

In [5]:
# Read the train data
trainData = pd.read_csv(trainFeaturePath, names = ['user', 'movie', 'globalAvg', 'sur1', 'sur2', 'sur3', 'sur4', 'sur5','smr1', 'smr2', 'smr3', 'smr4', 'smr5', 'userAvg', 'movieAvg', 'rating'], header=None)

In [6]:
# Initialize Train set
reader = Reader(rating_scale=(1,5))
trainDataSurprise  = Dataset.load_from_df(trainData[["user", "movie", "rating"]], reader)

### SVD Fine Tuning

In [None]:
bestSVDPath = "/content/drive/MyDrive/Netflix Movie recommendation/data/best_SVD.pickle"

In [None]:
# Params for Tuning
paramGrid = {'n_epochs': list(range(1,100, 10)), 'lr_all': [0.001, 0.002, 0.005, 0.01, 0.1, 0.05]}

In [None]:
# Get grid Search instance
gs = SurpriseGridSearchCV(SVD, paramGrid, measures=['rmse'], cv=3)

In [None]:
# Fit the model and perform Cross Validation
gs.fit(trainDataSurprise)

In [None]:
# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

In [None]:
if not os.path.exists(bestSVDPath):
  surpriseDump(bestSVDPath, algo=bestSVD, verbose=1)

### SVDpp Fine Tuning

In [None]:
bestSVDppPath = "/content/drive/MyDrive/Netflix Movie recommendation/data/best_SVD_pp.pickle"

In [None]:
# Params for Tuning
paramGrid = {'n_epochs': list(range(1,100, 10)), 'lr_all': [0.001, 0.002, 0.005, 0.01, 0.1, 0.05]}

In [None]:
# Get grid Search instance
gsSVDpp = SurpriseGridSearchCV(SVDpp, paramGrid, measures=['rmse'], cv=3)

In [None]:
# Fit the model and perform Cross Validation
gsSVDpp.fit(trainDataSurprise)

In [None]:
# best RMSE score
print(gsSVDpp.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gsSVDpp.best_params['rmse'])

0.9567775039536627
{'n_epochs': 41, 'lr_all': 0.002}


In [None]:
bestSVDpp = gsSVDpp.best_estimator['rmse']

In [None]:
if not os.path.exists(bestSVDppPath):
  surpriseDump(bestSVDppPath, algo=bestSVDpp, verbose=1)
  print("Successfully written SVDpp to pickle")

The dump has been saved as file /content/drive/MyDrive/Netflix Movie recommendation/data/best_SVD_pp.pickle
Successfully written SVDpp to pickle


### Surprise KNN User Fine Tuning

In [7]:
bestKnnUserPath = "/content/drive/MyDrive/Netflix Movie recommendation/data/best_KNN_user.pickle"

In [11]:
paramGridKnnUser = {
    'bsl_options': {'method': ['sgd']},
    'k': list(range(30, 80, 10)),
    'sim_options': {
        'name': ['pearson_baseline'],
        'min_support': list(range(1, 3)),
        'user_based': [True],
        'shrinkage': [100]
        }
  }

In [9]:
# Get grid Search instance
gsKnnUser = SurpriseGridSearchCV(KNNBaseline, paramGridKnnUser, measures=['rmse'], cv=3)

In [12]:
# Fit the model and perform Cross Validation
cur = datetime.now()
gsKnnUser.fit(trainDataSurprise)
print("Time Taken: ", datetime.now()-cur)

Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline si

In [13]:
# best RMSE score
print(gsKnnUser.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gsKnnUser.best_params['rmse'])

0.9675892090483847
{'bsl_options': {'method': 'sgd'}, 'k': 90, 'sim_options': {'name': 'pearson_baseline', 'min_support': 1, 'user_based': True, 'shrinkage': 100}}


In [14]:
bestKnnUser = gsKnnUser.best_estimator['rmse']

In [15]:
if not os.path.exists(bestKnnUserPath):
  surpriseDump(bestKnnUserPath, algo=bestKnnUser, verbose=1)
  print("Successfully written KNN User to pickle")

The dump has been saved as file /content/drive/MyDrive/Netflix Movie recommendation/data/best_KNN_user.pickle
Successfully written KNN User to pickle


### Surprise KNN Movie Fine Tuning

In [16]:
bestKnnMoviePath = "/content/drive/MyDrive/Netflix Movie recommendation/data/best_KNN_movie.pickle"

In [17]:
paramGridKnnMovie = {
    'bsl_options': {'method': ['sgd']},
    'k': list(range(30, 100, 10)),
    'sim_options': {
        'name': ['pearson_baseline'],
        'min_support': list(range(1, 3)),
        'user_based': [False],
        'shrinkage': [100]
        }
  }

In [18]:
# Get grid Search instance
gsKnnMovie = SurpriseGridSearchCV(KNNBaseline, paramGridKnnMovie, measures=['rmse'], cv=3)

In [19]:
# Fit the model and perform Cross Validation
cur = datetime.now()
gsKnnMovie.fit(trainDataSurprise)
print("Time Taken: ", datetime.now()-cur)

Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using sgd...
Computing the pearson_baseline si

In [20]:
# best RMSE score
print(gsKnnMovie.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gsKnnMovie.best_params['rmse'])

1.0665640999837576
{'bsl_options': {'method': 'sgd'}, 'k': 30, 'sim_options': {'name': 'pearson_baseline', 'min_support': 1, 'user_based': False, 'shrinkage': 100}}


In [21]:
bestKnnMovie = gsKnnMovie.best_estimator['rmse']

In [22]:
if not os.path.exists(bestKnnMoviePath):
  surpriseDump(bestKnnMoviePath, algo=bestKnnMovie, verbose=1)
  print("Successfully written KNN Movie to pickle")

The dump has been saved as file /content/drive/MyDrive/Netflix Movie recommendation/data/best_KNN_movie.pickle
Successfully written KNN Movie to pickle
