In [275]:
import pandas as pd
import numpy as np
import keras
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from keras.optimizers import Adam
from keras.utils.vis_utils import model_to_dot
from sklearn.metrics import mean_absolute_error

In [303]:
review_df = pd.read_csv('final_df')
review_df.drop('Unnamed: 0', axis=1, inplace=True)
beer_df = pd.read_csv('final_beers')
beer_df.drop('Unnamed: 0', axis=1, inplace=True)

In [304]:
df = review_df[['reviewer_id', 'beer_id', 'overall_rating']]

In [305]:
df.head()

Unnamed: 0,reviewer_id,beer_id,overall_rating
0,0,0,3.89
1,1,0,4.79
2,2,0,4.89
3,3,0,4.7
4,4,0,4.5


In [306]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 140834 entries, 0 to 140833
Data columns (total 3 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   reviewer_id     140834 non-null  int64  
 1   beer_id         140834 non-null  int64  
 2   overall_rating  140834 non-null  float64
dtypes: float64(1), int64(2)
memory usage: 3.2 MB


In [307]:
num_reviewers = len(df['reviewer_id'].unique())
num_beers = len(df['beer_id'].unique())
n_latent_factors = 3

In [308]:
def create_network(rater, beer, rating):
    
    #input beer and rate
    #embedding both beer and rater
    beer_input=Input(shape=[1],name='Beer')
    beer_embedding=Embedding(num_beers + 1, n_latent_factors, name='Beer-Embedding')(beer_input)
    beer_vec = Flatten(name='FlattenBeer')(beer_embedding)
    #add a drop-off layer to prevent overfitting
    beer_vec = keras.layers.Dropout(0.2)(beer_vec)
    
    rater_input=Input(shape=(1,),name='Rater')
    rater_embedding=Embedding(num_reviewers + 1, n_latent_factors, name='Rater-Embedding')(rater_input)
    rater_vec=Flatten(name='FlattenRater')(rater_embedding)
    rater_vec = keras.layers.Dropout(0.2)(rater_vec)
    
    #concat beer vec and rater vector to put in neural_network
    prod = Dot(name="Dot-Product", axes=1)([beer_vec, rater_vec])
    prod = keras.layers.Dropout(0.2)(prod)
    
    #3 hiddden laysers
    dense = keras.layers.Dense(100,name='FullyConnected')(prod)
    dropout_1 = keras.layers.Dropout(0.2,name='Dropout')(dense)
    dense_2 = keras.layers.Dense(50,name='FullyConnected-1')(prod)
    dropout_2 = keras.layers.Dropout(0.2,name='Dropout')(dense_2)
    dense_3 = keras.layers.Dense(25,name='FullyConnected-2', activation='relu')(dense_2)
    
    #output layer
    result = keras.layers.Dense(1, activation='relu',name='Activation')(dense_3)
    
    adam = Adam(lr=0.005)
    model = keras.Model([rater_input, beer_input], result)
    model.compile(optimizer=adam,loss= 'mean_absolute_error')
    
    model.fit(x=[rater, beer], y=rating, batch_size=20, epochs=20, verbose=0)
    
    return model

In [353]:
train, test = train_test_split(df, test_size=0.2, random_state=45)

In [354]:
model=create_network(train['reviewer_id'], train['beer_id'], train['overall_rating'])

In [355]:
y_hat=model.predict([test['reviewer_id'], test['beer_id']])

In [356]:
y_true=test['overall_rating']

In [357]:
print(mean_absolute_error(y_true, y_hat))

0.33581334502119625


In [315]:
df_ratings

Unnamed: 0,reviewer_id,beer_id,overall_rating
25,25,1,3.48
105635,25,7333,3.34
29038,25,2190,3.29
2378,25,160,3.21
55841,25,3873,3.19
70766,25,5050,3.19
67737,25,4815,3.18
64021,25,4524,3.02
22523,25,1665,2.77
271,25,11,1.8


In [437]:
numreview = 3
numrec = 3
def recommendation(reviewer_id):
    reviewed = df[df['reviewer_id']==reviewer_id].sort_values(by='overall_rating', ascending=False)
    #beers that the reviewer has tried and the ratings they gave them [reviewer_id, beer_id, overall_rating]
    reviewer=reviewed['reviewer_id'].values[0]
    #the id of the reviewer
    toprec = reviewed['beer_id'].values[:numreview]
    #the id of the top rated beers of the reviewer
    
    #Beers reviewed by user
    print('Top beers reviewed by user ID: {}'.format(reviewer))
    for recs in toprec:
        rec_id = beer_df.loc[beer_df['beer_id']==int('{}'.format(recs)), ['beer_name', 'beer_style', 'beer_substyle']]
        print(rec_id['beer_name'].values)
    user_pred = pd.Series(reviewer for i in range(df['beer_id'].nunique()))
    beer_pred = pd.Series(df['beer_id'].unique())
    
    predictions=model.predict([user_pred, beer_pred])
    predictions = np.array([a[0] for a in predictions])
    
    recommended_beer_ids = (-predictions).argsort()[:numrec]
    
    #beers recommended to user
    print('Beers recommended for user ID: {}'.format(reviewer))
    for id_beer in recommended_beer_ids:        
        tmp_id = beer_df.loc[beer_df['beer_id']==int('{}'.format(id_beer)), ['beer_name', 'beer_style', 'beer_substyle']]
        print(tmp_id['beer_name'].values)
    #return rec_id, tmp_id

In [438]:
recommendation(0)

Top beers reviewed by user ID: 0
['Trappistes Rochefort 10']
['Trappist Westvleteren 12 (XII)']
['Westmalle Trappist Tripel']
Beers recommended for user ID: 0
['Camo 900 High Gravity Lager']
['Camo High Gravity Lager']
['Pit Bull']


In [386]:
recommendation(55)

Top beers reviewed by user ID: 55
['Oatmeal Raisin Cookie Brown Ale']
['Old Stock Cellar Reserve (Aged In Bourbon Barrels)']
['Coolship Red']
Beers recommended for user ID: 55
['Four O Street Legal Malt Liquor']
['Bourbon County Brand Anniversary Stout']
['Loerik']


In [388]:
recommendation(8001)

Top beers reviewed by user ID: 8001
['As You Wish...']
['The Revival - Barrel-Aged']
['War Bird']
Beers recommended for user ID: 8001
['Pit Bull']
['Beer:Barrel:Time (2020)']
['Lou Pepe - Kriek']


In [429]:
num_recs = {}
num_revs = {}

for i in range(num_reviewers):
    top_rev, top_rec = recommendation(i)
    for rev in top_rev['beer_style']:
        if rev in num_revs:
            num_revs[rev] +=1
        else:
            num_revs[rev] = 1
    for rec in top_rec['beer_style']:
        if rec in num_recs:
            num_recs[rec] +=1
        else:
            num_recs[rec] = 1

In [430]:
num_recs

{'Pale Lagers': 11832,
 'Wild/Sour Beers': 533,
 'Specialty Beers': 1767,
 'India Pale Ales': 343,
 'Stouts': 694}

In [431]:
num_revs

{'Strong Ales': 2007,
 'Brown Ales': 696,
 'Bocks': 611,
 'Porters': 855,
 'Specialty Beers': 1202,
 'Stouts': 1717,
 'Wild/Sour Beers': 780,
 'Pale Lagers': 1556,
 'Dark Lagers': 824,
 'India Pale Ales': 1529,
 'Wheat Beers': 805,
 'Pale Ales': 1817,
 'Hybrid Beers': 266,
 'Dark Ales': 504}