<a href="https://colab.research.google.com/github/victorsouza89/NN_recom_tests/blob/main/NN_recom.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import os
import warnings
from matplotlib import pyplot as plt

from keras.models import load_model
from sklearn.model_selection import train_test_split
from keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from keras.models import Model
warnings.filterwarnings('ignore')
%matplotlib inline

## Data

In [2]:
url="http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/Movies_and_TV.csv"
dataset=pd.read_csv(url, header=None)
dataset.columns = ['item','user','rating','timestamp']
dataset = dataset.drop(columns=['timestamp'])
dataset['user_id'] = dataset.groupby(['user']).ngroup()
dataset['item_id'] = dataset.groupby(['item']).ngroup()
dataset = dataset.sample(frac=1).reset_index(drop=True)
dataset

Unnamed: 0,item,user,rating,user_id,item_id
0,B009474UW4,A23OYFPIL24JZ1,5.0,1115590,142109
1,B0000E2PVR,A2QXPRQOE1N6Y4,5.0,1768353,37113
2,B0002MJT0I,A3GMELZE8MZO0A,4.0,2488892,42231
3,B0069556YU,A26SX8K6X69QA8,5.0,1203101,134670
4,B00NAQ3EOK,A1NDOFET819W3D,5.0,657543,166326
...,...,...,...,...,...
8765563,B00C6B8LGQ,ASTNEAG86CDY8,5.0,3624840,149423
8765564,630560097X,A2K4NY3LIST3EU,5.0,1577214,18138
8765565,B00AF6B22E,A3FTZVEPKZMODV,4.0,2466615,144796
8765566,B000WXDMLE,A38BOJIXV786SP,5.0,2256662,81904


## Define NN

In [3]:
train, test = train_test_split(dataset, test_size=0.8)

In [4]:
# creating item embedding path
item_input = Input(shape=[1], name="Items-Input")
item_embedding = Embedding(len(train.item)+1, 5, name="Items-Embedding")(item_input)
item_vec = Flatten(name="Flatten-Items")(item_embedding)
# creating user embedding path
user_input = Input(shape=[1], name="User-Input")
user_embedding = Embedding(len(train.user)+1, 5, name="User-Embedding")(user_input)
user_vec = Flatten(name="Flatten-Users")(user_embedding)
# concatenate features
conc = Concatenate()([item_vec, user_vec])
# add fully-connected-layers
fc1 = Dense(32, activation='relu')(conc)
#fc2 = Dense(32, activation='relu')(fc1)
#fc3 = Dense(32, activation='relu')(fc2)
out = Dense(1)(fc1)
# Create model and compile it
model = Model([item_input, user_input], out)
model.compile('adam', 'mean_squared_error')
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Items-Input (InputLayer)       [(None, 1)]          0           []                               
                                                                                                  
 User-Input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 Items-Embedding (Embedding)    (None, 1, 5)         8765570     ['Items-Input[0][0]']            
                                                                                                  
 User-Embedding (Embedding)     (None, 1, 5)         8765570     ['User-Input[0][0]']             
                                                                                              

## Train NN

In [5]:
history = model.fit([train.item_id, train.user_id], train.rating, validation_split = 0.1, epochs=1, verbose=1)



In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

KeyError: ignored

## Predictions NN

In [33]:
k = 100000
predictions = model.predict([test.item_id.head(k), test.user_id.head(k)]).T

In [34]:
pred_round = np.round(predictions)
real_ratings = test.rating.head(k).values
errors = pred_round - real_ratings

In [35]:
np.sqrt(np.sum((errors)**2)/k)

1.1691236033884527

In [46]:
[print(f'{predictions[0][i]:.3f} -> {pred_round[0][i]:.1f} | {test.rating.iloc[i]} : err={errors[0][i]:.1f}') for i in range(100)]

4.801 -> 5.0 | 5.0 : err=0.0
3.965 -> 4.0 | 5.0 : err=-1.0
4.189 -> 4.0 | 5.0 : err=-1.0
4.300 -> 4.0 | 4.0 : err=0.0
4.092 -> 4.0 | 2.0 : err=2.0
4.345 -> 4.0 | 4.0 : err=0.0
2.240 -> 2.0 | 1.0 : err=1.0
3.891 -> 4.0 | 5.0 : err=-1.0
3.543 -> 4.0 | 1.0 : err=3.0
4.306 -> 4.0 | 5.0 : err=-1.0
4.366 -> 4.0 | 5.0 : err=-1.0
4.463 -> 4.0 | 5.0 : err=-1.0
4.377 -> 4.0 | 5.0 : err=-1.0
4.361 -> 4.0 | 4.0 : err=0.0
4.523 -> 5.0 | 5.0 : err=0.0
3.944 -> 4.0 | 5.0 : err=-1.0
4.396 -> 4.0 | 5.0 : err=-1.0
4.087 -> 4.0 | 2.0 : err=2.0
4.635 -> 5.0 | 4.0 : err=1.0
4.554 -> 5.0 | 5.0 : err=0.0
4.851 -> 5.0 | 5.0 : err=0.0
4.410 -> 4.0 | 5.0 : err=-1.0
4.656 -> 5.0 | 5.0 : err=0.0
4.341 -> 4.0 | 5.0 : err=-1.0
4.389 -> 4.0 | 5.0 : err=-1.0
4.371 -> 4.0 | 5.0 : err=-1.0
4.410 -> 4.0 | 5.0 : err=-1.0
4.614 -> 5.0 | 5.0 : err=0.0
4.039 -> 4.0 | 4.0 : err=0.0
4.236 -> 4.0 | 5.0 : err=-1.0
4.184 -> 4.0 | 5.0 : err=-1.0
4.059 -> 4.0 | 5.0 : err=-1.0
4.416 -> 4.0 | 3.0 : err=1.0
4.615 -> 5.0 | 5.0 : err=0

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]