# Application of the epsilon greedy algorithm to beer recommendation methods

In [1]:
import os
os.chdir('C:/Users/Raphael/Dropbox/Bandit Algo/') #Select your working directory
cwd = os.getcwd()
import pandas as pd
import numpy as np
import random

## Creation of the Matrix<sub>nbBeers x nbUsers</sub> : grade given from each user for each beer.

In [3]:
df = pd.read_csv("beer_reviews.csv")
# Limit to the top 250 beers
top_n = df.beer_name.value_counts().index[:250]
df = df[df.beer_name.isin(top_n)]

# Create a matrix of beer vs. reviews; values are overall review score
df_wide = pd.pivot_table(df, values=["review_overall"],
        index=["beer_name", "review_profilename"],
        aggfunc=np.mean).unstack()
df_wide = df_wide.fillna(0)
df_wide.columns=[x[1] for x in list(df_wide.columns.values)]

print(df_wide.head())

#Split into train and test

#The train will be used to train our differents recommendation models
df_wide_train=df_wide.iloc[:,:int(np.floor(df_wide.shape[1]/2))]

#The test will be used to simulate different users reacting to the recommandation they are given and to compute the reward
df_wide_test=df_wide.iloc[:,int(np.floor(df_wide.shape[1]/2)):]

                            0110x011  02maxima  03SVTCobra  05Harley  \
beer_name                                                              
#9                               0.0       0.0         0.0       0.0   
120 Minute IPA                   0.0       0.0         0.0       4.0   
1554 Enlightened Black Ale       0.0       0.0         0.0       0.0   
60 Minute IPA                    0.0       0.0         0.0       0.0   
90 Minute IPA                    5.0       0.0         0.0       4.0   

                            0Naught0  0beerguy0  0runkp0s  0tt0  1000Bottles  \
beer_name                                                                      
#9                               0.0        0.0       0.0   0.0          0.0   
120 Minute IPA                   0.0        0.0       0.0   1.5          0.0   
1554 Enlightened Black Ale       0.0        0.0       0.0   0.0          0.0   
60 Minute IPA                    0.0        0.0       0.0   0.0          0.0   
90 Minute IPA  

## We consider here 3 recommendation methods, based on 3 different distance matrices

In [7]:
from sklearn.metrics.pairwise import (
    euclidean_distances, cosine_similarity, pairwise_distances
)

# Calculate euclidean distance between each beer
eucl_dists = euclidean_distances(df_wide_train)
eucl_dists = pd.DataFrame(eucl_dists, columns=df_wide_train.index)
eucl_dists.index = eucl_dists.columns

# Calculate cosine similarity
cos_dists = cosine_similarity(df_wide_train)
cos_dists = pd.DataFrame(cos_dists, columns=df_wide_train.index)
cos_dists.index = cos_dists.columns

# Calculate distance correlation
corr_dists = pairwise_distances(df_wide_train,metric='correlation')
corr_dists = pd.DataFrame(corr_dists, columns=df_wide_train.index)
corr_dists.index = corr_dists.columns

# Use distance matrix to determine similarity
def get_sims(products, dists):
    """Return similarity matrix"""
    p = dists[products].apply(lambda row: np.sum(row), axis=1)
    boo=all(cos_dists==dists)
    p = p.sort_values(ascending=boo)
    return p.index[p.index.isin(products)==False]

## Let us look at a little example on how it works

In [8]:
products = ["Sierra Nevada Pale Ale", "120 Minute IPA", "Coors Light"]

eucl_prods = get_sims(products,eucl_dists)[:10]
cos_prods = get_sims(products,cos_dists)[:10]
corr_prods = get_sims(products,corr_dists)[:10]

print("Products similar to:", ', '.join(products))
pd.DataFrame({'Euclidean Distance': eucl_prods,
        "Cosine Similarity":cos_prods,
        "Distance Correlation":corr_prods})

Products similar to: Sierra Nevada Pale Ale, 120 Minute IPA, Coors Light


Unnamed: 0,Cosine Similarity,Distance Correlation,Euclidean Distance
0,Founders Porter,Samuel Adams Boston Lager,Samuel Adams Cranberry Lambic
1,Vanilla Porter,Sierra Nevada Celebration Ale,Bud Light
2,Raging Bitch Belgian-Style IPA,HopDevil Ale,Corona Extra
3,Terrapin Coffee Oatmeal Imperial Stout,Stone IPA (India Pale Ale),Miller Lite
4,The Abyss,60 Minute IPA,Heineken Lager Beer
5,Supplication,Samuel Adams Summer Ale,Long Hammer IPA
6,Founders Backwoods Bastard,Arrogant Bastard Ale,Shiner Bock
7,New Holland Dragon's Milk Oak Barrel Ale,Anchor Steam Beer,Red Stripe Jamaican Lager
8,Furious,Stone Ruination IPA,Samuel Adams Cherry Wheat
9,Creme Brulee (Imperial Milk Stout),Samuel Adams Winter Lager,Snake Dog IPA


## We can now define the different classes corresponding to the different recommendation methods

In [26]:
class EuclideanBeerRec():
    def __init__(self,top_k):
        self.beers=top_k
        
    def execute(self):
        beers=self.beers
        suggested_beers = get_sims(beers,eucl_dists)[:10]
        result = []
        for beer in suggested_beers:
            result.append(beer)
        return result

class CosineBeerRec():
    def __init__(self,top_k):
        self.beers=top_k

    def execute(self):
        beers=self.beers
        suggested_beers = get_sims(beers,cos_dists)[:10]
        result = []
        for beer in suggested_beers:
            result.append(beer)
        return result

class CorrelationBeerRec():
    def __init__(self,top_k):
        self.beers=top_k

    def execute(self):
        beers=self.beers
        suggested_beers = get_sims(beers,corr_dists)[:10]
        result = []
        for beer in suggested_beers:
            result.append(beer)
        return result

## Below is the code for the espilon greedy algorithm

In [9]:
class EpsilonGreedy(object):
    def __init__(self,n_arms,epsilon_decay=50):
        self.counts = [0] * n_arms  # example: number of views
        self.values = [0.] * n_arms # example: number of clicks / views
        self.decay = epsilon_decay
        self.n = n_arms

    def choose_arm(self):
        """Choose an arm for testing"""
        epsilon = self.get_epsilon()
        if np.random.random() > epsilon:
            # Exploit (use best arm)
            return np.argmax(self.values)
        else:
            # Explore (test all arms)
            return np.random.randint(self.n)

    def update(self,arm,reward):
        """Update an arm with some reward value""" # Example: click = 1; no click = 0
        self.counts[arm] = self.counts[arm] + 1
        n = self.counts[arm]
        value = self.values[arm]
        # Running product
        new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
        self.values[arm] = new_value

    def get_epsilon(self):
        """Produce epsilon"""
        total = np.sum(self.counts)
        return float(self.decay) / (total + float(self.decay))

## Now, we have to get the k best rated beers by user (here we choose 4)

In [14]:
list_beer=list(df_wide_test.index.values)

def get_top_k(row,ind,k):
    return([ind[x] for x in row.argsort()[-k:][::-1]])

df_wide_test=df_wide_test.transpose()

df_wide_test["top_k"]=df_wide_test.apply(get_top_k,ind=list_beer,k=4,axis=1)

In order to define a reward in our case, we will consider the k-1 favorite beers of an user. We will then predict 10 beers to recommend and see whether the k<sup>th</sup> is in the 10 beers, mainly focusing in its ranking. We will then define a score between 0 and 1 based on the function f given in the code below:

In [10]:
def f(x):
    return((1 - (x / 9.)) ** 2)

def get_reward(fav,pred):
    x= [i for i,x in enumerate(pred) if x == fav]
    if x!=[]:
        return(f(x[0]))
    else:
        return(0)

## Execution of the bandit algorithm

In [29]:
arms = ["EuclieanBeerRec","CosineBeerRec","CorrelationBeerRec"]
classes=[EuclideanBeerRec,CosineBeerRec,CorrelationBeerRec]
bandit = EpsilonGreedy(len(arms))
for x in list(df_wide_test.index.values):
    user=x
    top_k=df_wide_test["top_k"].loc[user]
    top_k_minus_one=top_k[:-1]
    fav=top_k[-1]
    
    arm = bandit.choose_arm()
    
    arm_name = arms[arm]
    
    pred = classes[arm](top_k_minus_one).execute()
    
    bandit.update(arm,get_reward(fav,pred))

print(bandit.values)
best=arms[bandit.values.index(max(bandit.values))]
print("Using the epsilon greedy algorithm, the best recommendation method is "+best)

[0.022365776629342488, 0.0, 0.03328798453229328]
Using the epsilon greedy algorithm, the best recommendation method is CorrelationBeerRec
