# **SVD Model**

## **Installing Surprise**

In [1]:
!pip install scikit-surprise

Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/f5/da/b5700d96495fb4f092be497f02492768a3d96a3f4fa2ae7dea46d4081cfa/scikit-surprise-1.1.0.tar.gz (6.4MB)
[K     |████████████████████████████████| 6.5MB 4.3MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.0-cp36-cp36m-linux_x86_64.whl size=1675336 sha256=cf8e50f3436288029578d39667edcc4541e63f95f677153c43343a3757f5a227
  Stored in directory: /root/.cache/pip/wheels/cc/fa/8c/16c93fccce688ae1bde7d979ff102f7bee980d9cfeb8641bcf
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.0


## **Importing libraries**

In [0]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import dump
from surprise import SVD
from surprise.model_selection import train_test_split

## **Preparing data**

In [0]:
ratings = pd.read_csv('ratings.csv')
books = pd.read_csv('books.csv')
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['book_id', 'user_id', 'rating']], reader)

## **Training**

In [0]:
train_set, test_set = train_test_split(data, test_size=0.2)
model = SVD(n_epochs=20, lr_all=0.005, reg_all=0.2)
model.fit(train_set)
predictions = model.test(test_set)

## **Saving the model**

In [5]:
model_file = "./model.pickle"
dump.dump(model_file, algo=model)
print('model saved')

model saved


## **Preparing the dataset with predictions**

In [6]:
pred_ds = pd.DataFrame(predictions, columns=['book_id', 'user_id', 
                                             'actual_rating', 'pred_rating', 
                                             'details'])
pred_ds['impossible'] = pred_ds['details'].apply(lambda x: x['was_impossible'])
pred_ds['pred_rating_round'] = pred_ds['pred_rating'].round()
pred_ds['abs_err'] = abs(pred_ds['pred_rating'] - pred_ds['actual_rating'])
pred_ds.drop(['details'], axis=1, inplace=True)
pred_ds

Unnamed: 0,book_id,user_id,actual_rating,pred_rating,impossible,pred_rating_round,abs_err
0,203,46398,5.0,3.843163,False,4.0,1.156837
1,2934,24096,4.0,3.492575,False,3.0,0.507425
2,6942,16977,5.0,4.154312,False,4.0,0.845688
3,4025,47603,3.0,4.267430,False,4.0,1.267430
4,842,20616,4.0,4.142464,False,4.0,0.142464
...,...,...,...,...,...,...,...
196347,7066,14253,4.0,3.616366,False,4.0,0.383634
196348,7583,3445,5.0,4.571810,False,5.0,0.428190
196349,1013,39237,3.0,3.228632,False,3.0,0.228632
196350,9728,5081,4.0,3.434611,False,3.0,0.565389


## **Viewing the results**

In [7]:
ratings_titles = ratings.merge(books[['book_id', 'title']], on='book_id', 
                               how='left')
ratings_titles = ratings_titles.merge(pred_ds[['book_id', 'user_id', 
                                               'pred_rating']], 
                                      on=['book_id', 'user_id'], how='left')
id_user = 588
user_ds = ratings_titles[ratings_titles['user_id'] == id_user]
res_ds = user_ds[user_ds['pred_rating'].notna()].sort_values('rating', 
                                                             ascending=False)

res_ds


Unnamed: 0,book_id,user_id,rating,title,pred_rating
682867,6865,588,5,,3.579615
12100,122,588,5,"The Power of One (The Power of One, #1)",3.409918
21101,212,588,5,,3.556043
822324,8305,588,4,,3.644535
20300,204,588,4,,3.70387
296718,2970,588,4,,3.666776
430993,4317,588,4,,3.185446
2601,27,588,4,Neither Here nor There: Travels in Europe,3.99344
29701,298,588,4,,3.497939
25502,256,588,4,,3.402025


In [9]:
bid = [27, 4630]
for b in bid:
  print(ratings.query(f'book_id == {b} and user_id == 588'), '\n')

      book_id  user_id  rating
2601       27      588       4 

        book_id  user_id  rating
462023     4630      588       4 

