# Installing Library Suprise
The documentation detail : https://surprise.readthedocs.io/en/stable/getting_started.html

In [4]:
!pip install scikit-surprise



# Importing Library

In [30]:
import pandas as pd
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import train_test_split

# Movie Lens Dataset

In [19]:
# Load the movielens-100k dataset (download it if needed).
data = Dataset.load_builtin('ml-100k')

In [27]:
rr = pd.DataFrame(data.raw_ratings,columns = ['userId','movieId','rating','timestamp'])

In [29]:
rr.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


# Read Dataset First

In [31]:
from surprise import Reader

In [56]:
reader = Reader(rating_scale=(1,5))
data_rr = Dataset.load_from_df(rr[['userId','movieId','rating']],reader = reader)

# Train Test Split

In [57]:
from surprise.model_selection import train_test_split
trainset,testset = train_test_split(data_rr)

# SVD

In [58]:
from surprise import SVD
svd_model = SVD()
svd_model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fcdd6a52280>

In [62]:
pred = svd_model.test(testset)

# RMSE Checking

In [67]:
from surprise import accuracy
accuracy.rmse(pred)

RMSE: 0.9371


0.937096014853841

# Cross Validate

In [72]:
from surprise.model_selection import cross_validate
# Run 5-fold cross-validation and print results.
cross_validate(algo, data_rr, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9415  0.9319  0.9369  0.9292  0.9412  0.9361  0.0049  
MAE (testset)     0.7433  0.7346  0.7406  0.7313  0.7411  0.7382  0.0045  
Fit time          4.05    4.28    4.36    4.14    4.43    4.25    0.14    
Test time         0.39    0.12    0.12    0.12    0.12    0.17    0.11    


{'test_rmse': array([0.94146211, 0.93191022, 0.93688012, 0.92919319, 0.94120277]),
 'test_mae': array([0.74329685, 0.73457661, 0.74061033, 0.73126576, 0.7410701 ]),
 'fit_time': (4.051919221878052,
  4.275623321533203,
  4.355277061462402,
  4.14227819442749,
  4.432772874832153),
 'test_time': (0.3879690170288086,
  0.12125802040100098,
  0.12038612365722656,
  0.11662578582763672,
  0.12415289878845215)}

In [71]:
svd_model.pu

array([[ 0.08280576,  0.2097128 , -0.01862517, ...,  0.04910215,
        -0.07803353, -0.20798585],
       [ 0.00567627, -0.02955097, -0.04768467, ..., -0.12035405,
        -0.16166514, -0.06524532],
       [-0.18833084,  0.09318954, -0.06668481, ..., -0.01526016,
         0.02254716, -0.07064192],
       ...,
       [-0.00788184,  0.02541666, -0.01206611, ..., -0.01184251,
        -0.12202866,  0.00717855],
       [ 0.02601913,  0.12180529,  0.10657666, ..., -0.16658154,
         0.08865719,  0.03233118],
       [ 0.16365561, -0.00634451, -0.00432577, ..., -0.16716541,
        -0.08428629, -0.15935323]])

# Grid Search

In [83]:
params = {'n_factors':[10,100],
         'n_epochs':[10,20]}

In [84]:
from surprise.model_selection import GridSearchCV
grid = GridSearchCV(SVD,param_grid=params,cv=5)
grid.fit(data_rr)

In [85]:
grid.best_score

{'rmse': 0.9367876669658447, 'mae': 0.7378570551185001}