In [2]:
import pandas as pd
import numpy as np
from surprise import SVD, accuracy, BaselineOnly, Reader, Dataset
from surprise.model_selection import cross_validate, train_test_split
from surprise import SVDpp, NormalPredictor, BaselineOnly,KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore, NMF, SlopeOne, CoClustering

## Beer Recommender System with Surprise

This project uses a dateset of around 1.5 million reviews sourced from BeerAdvocate to create a collaborative recommendation system. 

In [3]:
from progressbar import ProgressBar

pbar = ProgressBar()

In [4]:
df = pd.read_csv('beer_reviews_clean.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,brewery_id,brewery_name,review_time,review_overall,review_aroma,review_appearance,review_profilename,beer_style,review_palate,review_taste,beer_name,beer_abv,beer_beerid
0,0,10325,Vecchio Birraio,2009-02-16 20:57:03,1.5,2.0,2.5,stcules,Hefeweizen,1.5,1.5,Sausa Weizen,5.0,47986
1,1,10325,Vecchio Birraio,2009-03-01 13:44:57,3.0,2.5,3.0,stcules,English Strong Ale,3.0,3.0,Red Moon,6.2,48213
2,2,10325,Vecchio Birraio,2009-03-01 14:10:04,3.0,2.5,3.0,stcules,Foreign / Export Stout,3.0,3.0,Black Horse Black Beer,6.5,48215
3,3,10325,Vecchio Birraio,2009-02-15 19:12:25,3.0,3.0,3.5,stcules,German Pilsener,2.5,3.0,Sausa Pils,5.0,47969
4,4,1075,Caldera Brewing Company,2010-12-30 18:53:26,4.0,4.5,4.0,johnmichaelsen,American Double / Imperial IPA,4.0,4.5,Cauldron DIPA,7.7,64883


In [5]:
reviews = df[['review_profilename', 'beer_name', 'review_overall']]
reviews.head()

Unnamed: 0,review_profilename,beer_name,review_overall
0,stcules,Sausa Weizen,1.5
1,stcules,Red Moon,3.0
2,stcules,Black Horse Black Beer,3.0
3,stcules,Sausa Pils,3.0
4,johnmichaelsen,Cauldron DIPA,4.0


The dataset contains many beers with low ratings counts. We don't want the system to recommend a beer that only several people have enjoyed, so we will filter out all beers with lower than 50 ratings. 

In [6]:
min_beer_ratings = 50
filter_beers = df['beer_name'].value_counts() > min_beer_ratings
filter_beers = filter_beers[filter_beers].index.tolist()

min_user_ratings = 50
filter_users = df['review_profilename'].value_counts() > min_user_ratings
filter_users = filter_users[filter_users].index.tolist()

reviews_new = reviews[(reviews['beer_name'].isin(filter_beers)) & (reviews['review_profilename'].isin(filter_users))]
print('The original data frame shape:\t{}'.format(reviews.shape))
print('The new data frame shape:\t{}'.format(reviews_new.shape))

The original data frame shape:	(1496256, 3)
The new data frame shape:	(1075228, 3)


In [7]:
reader = Reader(rating_scale=(1, 5))

data = Dataset.load_from_df(reviews_new[['review_profilename', 'beer_name', 'review_overall']], reader)

In [8]:
trainset, testset = train_test_split(data, test_size=.25)

In [9]:
algo = SVD()

algo.fit(trainset)
predictions = algo.test(testset)

In [12]:
accuracy.rmse(predictions)

RMSE: 0.5758


0.5757757046576493

The RMSE on an untuned SVD algorithm is a respectable 0.57. Below we will evaluate several algorithms and see which one performs best on the data. 

In [10]:
predictions[0:5]

[Prediction(uid='Long813', iid='Hophead Double India Pale Ale', r_ui=4.0, est=3.585667317417661, details={'was_impossible': False}),
 Prediction(uid='beerwolf77', iid='Orchard White', r_ui=4.0, est=4.007106415545197, details={'was_impossible': False}),
 Prediction(uid='SShelly', iid='Green Flash Le Freak', r_ui=4.0, est=4.131120724067257, details={'was_impossible': False}),
 Prediction(uid='maxpower', iid='Lump Of Coal', r_ui=3.5, est=3.539782794389326, details={'was_impossible': False}),
 Prediction(uid='Cs1987', iid='König Pilsener', r_ui=4.0, est=3.6295640690908293, details={'was_impossible': False})]

In [15]:
benchmark = []
# Iterate over all algorithms
for algorithm in pbar([SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]):    # Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=True)
    
    # Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)
    
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')    

  9% |######                                                                  |

Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.5744  0.5754  0.5757  0.5752  0.0006  
Fit time          35.32   39.30   39.71   38.11   1.98    
Test time         3.22    2.66    3.34    3.07    0.30    


 18% |#############                                                           |

Evaluating RMSE of algorithm SVDpp on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.5694  0.5697  0.5696  0.5696  0.0002  
Fit time          3517.12 3483.23 3462.88 3487.74 22.37   
Test time         121.84  122.68  121.15  121.89  0.63    


 27% |###################                                                     |

Evaluating RMSE of algorithm SlopeOne on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.5743  0.5752  0.5738  0.5744  0.0006  
Fit time          18.47   19.02   18.61   18.70   0.24    
Test time         114.39  114.19  115.05  114.54  0.37    


 36% |##########################                                              |

Evaluating RMSE of algorithm NMF on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.5834  0.5840  0.5852  0.5842  0.0008  
Fit time          41.14   42.67   42.30   42.04   0.65    
Test time         2.77    3.43    3.51    3.24    0.33    


 45% |################################                                        |

Evaluating RMSE of algorithm NormalPredictor on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9525  0.9535  0.9552  0.9537  0.0011  
Fit time          1.02    1.32    1.31    1.22    0.14    
Test time         2.99    3.47    3.47    3.31    0.23    
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.


 54% |#######################################                                 |

Evaluating RMSE of algorithm KNNBaseline on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.5739  0.5727  0.5747  0.5738  0.0008  
Fit time          11.79   12.24   12.37   12.13   0.25    
Test time         114.44  113.52  114.16  114.04  0.38    
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


 63% |#############################################                           |

Evaluating RMSE of algorithm KNNBasic on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.5877  0.5882  0.5908  0.5889  0.0014  
Fit time          9.42    9.54    9.78    9.58    0.15    
Test time         101.09  99.33   101.01  100.47  0.81    
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


 72% |####################################################                    |

Evaluating RMSE of algorithm KNNWithMeans on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.5871  0.5859  0.5856  0.5862  0.0006  
Fit time          9.57    9.71    9.98    9.75    0.17    
Test time         105.05  105.82  106.04  105.64  0.42    
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


 81% |##########################################################              |

Evaluating RMSE of algorithm KNNWithZScore on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.5898  0.5899  0.5896  0.5898  0.0001  
Fit time          10.00   10.18   10.47   10.22   0.19    
Test time         110.28  110.12  109.26  109.89  0.45    
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...


 90% |#################################################################       |

Evaluating RMSE of algorithm BaselineOnly on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.5742  0.5733  0.5725  0.5733  0.0007  
Fit time          2.77    2.81    2.84    2.81    0.03    
Test time         3.63    4.31    4.33    4.09    0.33    
Evaluating RMSE of algorithm CoClustering on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.6260  0.6258  0.6239  0.6252  0.0010  
Fit time          13.78   14.10   14.15   14.01   0.16    
Test time         4.07    3.18    3.93    3.73    0.39    


100% |########################################################################|


Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SVDpp,0.569573,3487.744543,121.888504
BaselineOnly,0.573327,2.806333,4.093334
KNNBaseline,0.573759,12.133106,114.039659
SlopeOne,0.574417,18.701889,114.540148
SVD,0.575154,38.108621,3.073264
NMF,0.5842,42.037437,3.236865
KNNWithMeans,0.586201,9.753362,105.637112
KNNBasic,0.588927,9.580004,100.474461
KNNWithZScore,0.589756,10.216587,109.887675
CoClustering,0.625232,14.011497,3.729301


SVDpp outperformed the other algorithms by .01% RMSE. We will try to tune the parameters further for more accuracy. 

In [16]:
from surprise.model_selection.search import GridSearchCV

In [17]:
param_grid = {'n_factors': [50,100,150],
              'n_epochs': [20,30], 
              'lr_all': [0.005,0.01],
              'reg_all':[0.02,0.1]}

In [26]:
grid_search = GridSearchCV(algo_class=SVDpp, param_grid=param_grid, cv=3, joblib_verbose=2)

In [29]:
grid_search.fit(data)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.7min remaining:    0.0s


KeyboardInterrupt: 

After letting the grid search run through an entire weekend, I decided the potential accuracy gain wasn't worth the sheer amount of time it took to fit. In the future, it would be worth revisiting this grid search with more computing power. In the meantime, we'll try manually tuning the model. 

In [33]:
svd_params = {'n_factors': 150, 'n_epochs': 75, 'lr_all': 0.01, 'reg_all': 0.1}

svd = SVDpp(n_factors=150, n_epochs=75, lr_all=0.01, reg_all=0.1)

cross_validate(svd, data, measures=['RMSE'], cv=3, verbose=False)

{'test_rmse': array([0.5696805 , 0.57078035, 0.57125664]),
 'fit_time': (64882.5064394474, 61108.732347011566, 58451.17490911484),
 'test_time': (157.06523060798645, 157.74752831459045, 153.9112446308136)}

In [13]:
trainset, testset = train_test_split(data, test_size=0.25)
algo = SVDpp(n_factors=150, n_epochs=75, lr_all=0.01, reg_all=0.1)
predictions = algo.fit(trainset).test(testset)
accuracy.rmse(predictions)

RMSE: 0.5718


0.5718358448010138

In [22]:
algo = NormalPredictor()
predictionsnormal = algo.fit(trainset).test(testset)
accuracy.rmse(predictionsnormal)

RMSE: 0.9570


0.9569947792018437

The tuned SVDpp model outperforms a NormalPredictor algorthim by over 4%

In [19]:
# Create function to evaluate predictions and get number of items rated by each user as well as number of ratings per beer

def get_items_rated(uid):
    try:
        return len(trainset.ur[trainset.to_inner_uid(uid)])
    except ValueError: # user was not part of the trainset
        return 0
    
def get_num_ratings(iid):
    try: 
        return len(trainset.ir[trainset.to_inner_iid(iid)])
    except ValueError:
        return 0
    
dfpred = pd.DataFrame(predictions, columns=['review_profilename', 'beer_name', 'rui', 'est', 'details'])
dfpred['items_rated'] = dfpred.review_profilename.apply(get_items_rated)
dfpred['num_ratings'] = dfpred.beer_name.apply(get_num_ratings)
dfpred['error'] = abs(dfpred.est - dfpred.rui)
best_predictions = dfpred.sort_values(by='error')[:10]
worst_predictions = dfpred.sort_values(by='error')[-10:]

In [20]:
best_predictions

Unnamed: 0,review_profilename,beer_name,rui,est,details,items_rated,num_ratings,error
139103,oteyj,The Abyss,5.0,5.0,{'was_impossible': False},55,795,0.0
260452,oteyj,Cantillon Crianza Helena,5.0,5.0,{'was_impossible': False},55,48,0.0
211923,oteyj,Supplication,5.0,5.0,{'was_impossible': False},55,654,0.0
204248,oteyj,Trappist Westvleteren 8,5.0,5.0,{'was_impossible': False},55,440,0.0
262703,whartontallboy,Uerige Altbier (Classic),4.0,3.999997,{'was_impossible': False},241,129,3e-06
140207,mikesgroove,The Reverend,4.0,4.000004,{'was_impossible': False},2227,448,4e-06
205873,brewandbbq,Corne De Brume,4.0,4.000006,{'was_impossible': False},507,39,6e-06
181089,brentk56,J.W. Lees Harvest Ale (Port Cask),4.0,4.000007,{'was_impossible': False},1800,106,7e-06
144698,Foxman,Allagash Fluxus 2007,4.0,4.000008,{'was_impossible': False},541,40,8e-06
178598,kbub6f,Prohibition Ale,4.0,4.000009,{'was_impossible': False},395,184,9e-06


In [25]:
worst_predictions

Unnamed: 0,review_profilename,beer_name,rui,est,details,items_rated,num_ratings,error
188593,aaronh,Drie Fonteinen Oude Geuze,1.0,4.19701,{'was_impossible': False},406,323,3.19701
138373,dasenebler,YuleSmith (Summer),1.0,4.241274,{'was_impossible': False},352,569,3.241274
88273,rvdoorn,Darkness,1.0,4.274347,{'was_impossible': False},197,391,3.274347
14188,rye726,Uerige Doppelsticke,1.0,4.277991,{'was_impossible': False},732,244,3.277991
228632,EssexAleMan,Hardcore IPA (2nd Ed. 9.2%),1.0,4.353312,{'was_impossible': False},62,64,3.353312
89760,ChrisCage,La Fin Du Monde,1.0,4.3562,{'was_impossible': False},123,1438,3.3562
32019,brdc,Sinners Blend 2008,1.0,4.374767,{'was_impossible': False},611,39,3.374767
206510,jfitzy78,Fantôme Brise-BonBons,1.0,4.408655,{'was_impossible': False},35,105,3.408655
32728,rvdoorn,Pliny The Elder,1.0,4.479191,{'was_impossible': False},197,1248,3.479191
190299,madtappers,The Dissident,1.0,4.522709,{'was_impossible': False},33,231,3.522709


In [26]:
from collections import defaultdict

def get_top_n(predictions, n=10):

    # map preictions to each user.
    top_n = defaultdict(list)
    for review_profilename, beer_name, true_r, est, _ in predictions:
        top_n[review_profilename].append((beer_name, est))

    # sort predictions for each user and retrieve the k highest ones.
    for review_profilename, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[review_profilename] = user_ratings[:n]

    return top_n

In [28]:
top_ratings = get_top_n(predictions)

In [31]:
top_ratings['whartontallboy']

[('St. Bernardus Abt 12', 4.290482442303683),
 ('Alpha King Pale Ale', 4.26160114639165),
 ('Gumballhead', 4.256343229014798),
 ('Founders Breakfast Stout', 4.240451085670301),
 ('Southampton Saison', 4.238378760158777),
 ('Péché Mortel (Imperial Stout Au Cafe)', 4.181327170748399),
 ('YuleSmith (Summer)', 4.180392408293553),
 ("Samuel Smith's Oatmeal Stout", 4.1580391159394585),
 ('Southampton Grand Cru', 4.154582216869394),
 ('Tripel Karmeliet', 4.145165485014519)]