## Source
 - https://towardsdatascience.com/building-a-book-recommendation-system-using-keras-1fba34180699
 
## Data
 - https://www.kaggle.com/zygmunt/goodbooks-10k

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings

from keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from keras.models import Model

warnings.filterwarnings('ignore')
%matplotlib inline

Using TensorFlow backend.


In [2]:
dataset = pd.read_csv('ratings.csv')

In [3]:
dataset.head()

Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4


In [4]:
dataset.shape

(981756, 3)

In [5]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(dataset, test_size=0.2, random_state=42)

In [6]:
train.head()

Unnamed: 0,book_id,user_id,rating
341848,3423,4608,2
964349,9811,36373,5
645459,6485,2957,4
74960,750,42400,3
358670,3591,36886,5


In [7]:
n_users = len(dataset.user_id.unique())
n_users

53424

In [8]:
n_books = len(dataset.book_id.unique())
n_books

10000

## Recommender Model

> Embeddings are a method of mapping from discrete objects, such as words to vectors of continuous values.

In [9]:
# creating book embedding path
book_input = Input(shape=[1], name="Book-Input")
book_embedding = Embedding(n_books+1, 5, name="Book-Embedding")(book_input)
book_vec = Flatten(name="Flatten-Books")(book_embedding)

# creating user embedding path
user_input = Input(shape=[1], name="User-Input")
user_embedding = Embedding(n_users+1, 5, name="User-Embedding")(user_input)
user_vec = Flatten(name="Flatten-Users")(user_embedding)

# performing dot product and creating model
prod = Dot(name="Dot-Product", axes=1)([book_vec, user_vec])
model = Model([user_input, book_input], prod)
model.compile('adam', 'mean_squared_error')

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [10]:
from keras.models import load_model

if os.path.exists('regression_model.h5'):
    model = load_model('regression_model.h5')
else:
    history = model.fit([train.user_id, train.book_id], train.rating, epochs=5, verbose=1)
    model.save('regression_model.h5')
    plt.plot(history.history['loss'])
    plt.xlabel("Epochs")
    plt.ylabel("Training Error")

In [11]:
model.evaluate([test.user_id, test.book_id], test.rating)



1.175222886558695

In [18]:
predictions = model.predict([test.user_id.head(10), test.book_id.head(10)])

In [15]:
predictions

array([[4.7777977],
       [3.9624794],
       [4.0087776],
       [4.3738775],
       [3.2653754],
       [3.6973896],
       [3.6310987],
       [5.404256 ],
       [4.3073125],
       [4.013275 ]], dtype=float32)

In [19]:
test.head(10)

Unnamed: 0,book_id,user_id,rating
646451,6495,19643,5
614851,6175,8563,4
974393,9920,52110,3
21471,215,33864,5
272540,2728,16587,3
235240,2354,52668,3
874406,8848,38264,3
414606,4152,52864,4
283254,2835,36535,3
432283,4330,36242,5


## MAke recomendations for user 123

In [21]:
# Creating dataset for making recommendations for the first user
book_data = np.array(list(set(dataset.book_id)))
book_data[:5]

array([1, 2, 3, 4, 5])

In [22]:
user = np.array([123 for i in range(len(book_data))])
user[:5]

array([123, 123, 123, 123, 123])

In [31]:
predictions = model.predict([user, book_data])
predictions

array([[3.60798  ],
       [3.4119132],
       [2.2937367],
       ...,
       [4.0478573],
       [3.7694402],
       [4.0238466]], dtype=float32)

In [32]:
predictions = predictions.reshape(-1)

In [33]:
recommended_book_ids = predictions.argsort()

In [34]:
predictions[recommended_book_ids]

array([-6.007576 , -5.851398 , -5.7418838, ...,  5.2901907,  5.3490267,
        5.6183195], dtype=float32)

In [35]:
recommended_book_ids

array([8615, 9569, 7822, ..., 9346, 6585, 8258])

In [36]:
recommended_book_ids[::-1]

array([8258, 6585, 9346, ..., 7822, 9569, 8615])

In [37]:
predictions[recommended_book_ids[::-1]]

array([ 5.6183195,  5.3490267,  5.2901907, ..., -5.7418838, -5.851398 ,
       -6.007576 ], dtype=float32)