# Beer Recommender in Python
[Example App](http://beers.yhathq.com/)

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import os

### Load in data

In [3]:
filename = "/Users/glamp/Dropbox-Yhat/yhat-box/datasets/beer_reviews/beer_reviews.csv"
df = pd.read_csv(filename)
# let's limit things to the top 250
n = 250
top_n = df.beer_name.value_counts().index[:n]
df = df[df.beer_name.isin(top_n)]

print df.head()
print "melting..."

      brewery_id             brewery_name  review_time  review_overall  \
798         1075  Caldera Brewing Company   1212201268             4.5   
1559       11715  Destiny Brewing Company   1137124057             4.0   
1560       11715  Destiny Brewing Company   1129504403             4.0   
1563       11715  Destiny Brewing Company   1137125989             3.5   
1564       11715  Destiny Brewing Company   1130936611             3.0   

      review_aroma  review_appearance review_profilename  \
798            4.5                  4             grumpy   
1559           3.5                  4    blitheringidiot   
1560           2.5                  4        NeroFiddled   
1563           3.0                  4    blitheringidiot   
1564           3.0                  3             Gavage   

                            beer_style  review_palate  review_taste  \
798   American Double / Imperial Stout            4.0           4.5   
1559           American Pale Ale (APA)            3.

### Create similarity matrix

In [4]:
df_wide = pd.pivot_table(df, values=["review_overall"],
                         rows=["beer_name", "review_profilename"],
                         aggfunc=np.mean).unstack()

# any cells that are missing data (i.e. a user didn't buy a particular product)
# we're going to set to 0
df_wide = df_wide.fillna(0)

# this is the key. we're going to use cosine_similarity from scikit-learn
# to compute the distance between all beers
print "calculating similarity"
dists = cosine_similarity(df_wide)

# stuff the distance matrix into a dataframe so it's easier to operate on
dists = pd.DataFrame(dists, columns=df_wide.index)

# give the indicies (equivalent to rownames in R) the name of the product id
dists.index = dists.columns
dists.head()

calculating similarity




beer_name,#9,120 Minute IPA,1554 Enlightened Black Ale,60 Minute IPA,90 Minute IPA,Aecht Schlenkerla Rauchbier Märzen,AleSmith IPA,AleSmith Speedway Stout,Allagash White,Alpha King Pale Ale,...,Vanilla Porter,Weihenstephaner Hefeweissbier,Weihenstephaner Korbinian,Westmalle Trappist Dubbel,Westmalle Trappist Tripel,World Wide Stout,Yeti Imperial Stout,Youngs Double Chocolate Stout,Yuengling Traditional Lager,Éphémère (Apple)
beer_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
#9,1.0,0.275405,0.274103,0.388364,0.365175,0.253841,0.228479,0.227612,0.340681,0.293315,...,0.26657,0.312395,0.276463,0.233554,0.276763,0.286534,0.299032,0.32928,0.348058,0.312499
120 Minute IPA,0.275405,1.0,0.251519,0.378258,0.410366,0.262425,0.315971,0.337541,0.282273,0.336796,...,0.201428,0.312193,0.28232,0.2708,0.301144,0.418214,0.337978,0.285483,0.233014,0.280248
1554 Enlightened Black Ale,0.274103,0.251519,1.0,0.319887,0.314028,0.252486,0.266866,0.261761,0.260275,0.307296,...,0.285846,0.300474,0.292369,0.265445,0.271656,0.262771,0.295029,0.316295,0.225219,0.273763
60 Minute IPA,0.388364,0.378258,0.319887,1.0,0.533042,0.316928,0.312343,0.307627,0.360975,0.385249,...,0.285143,0.413405,0.329941,0.308774,0.355926,0.358224,0.391041,0.39984,0.326916,0.339324
90 Minute IPA,0.365175,0.410366,0.314028,0.533042,1.0,0.312861,0.344218,0.358754,0.356804,0.418582,...,0.262775,0.436398,0.343738,0.333099,0.387312,0.405116,0.414385,0.395031,0.301877,0.332292


### Helper function for calculations

In [8]:
def get_sims(products):
    """
    get_top10 takes a distance matrix an a productid (assumed to be integer)
    and will calculate the 10 most similar products to product based on the
    distance matrix
    dists - a distance matrix
    product - a product id (integer)
    """
    p = dists[products].apply(lambda row: np.sum(row), axis=1)
    p = p.order(ascending=False)
    return p.index[p.index.isin(products) == False]


get_sims(["Sierra Nevada Pale Ale", "120 Minute IPA"])

Index([u'Sierra Nevada Celebration Ale', u'90 Minute IPA', u'60 Minute IPA', u'Stone Ruination IPA', u'Stone IPA (India Pale Ale)', u'Sierra Nevada Bigfoot Barleywine Style Ale', u'HopDevil Ale', u'Arrogant Bastard Ale', u'Samuel Adams Boston Lager', u'Storm King Stout', u'Old Rasputin Russian Imperial Stout', u'Double Bastard Ale', u'Brooklyn Black Chocolate Stout', u'Prima Pils', u'Two Hearted Ale', u'Hop Rod Rye', u'Hop Wallop', u'Ayinger Celebrator Doppelbock', u'La Fin Du Monde', u'Stone Imperial Russian Stout', u'Oaked Arrogant Bastard Ale', u'St. Bernardus Abt 12', u'Duvel', u'Anchor Steam Beer', u'India Pale Ale', u'Stone Smoked Porter', u'Hennepin (Farmhouse Saison)', u'Racer 5 India Pale Ale', u'Dead Guy Ale', u'Schneider Aventinus', u'Indian Brown Ale', u'World Wide Stout', u'Chocolate Stout', u'ApriHop', u'Samuel Smiths Oatmeal Stout', u'Shakespeare Oatmeal Stout', u'Samuel Adams Winter Lager', u'Chimay Grande Réserve (Blue)', u'Anchor Liberty Ale', u'Weihenstephaner Hefewe

### ScienceOps deployment

In [10]:
from yhat import Yhat, YhatModel, preprocess


class BeerRecommender(YhatModel):
    @preprocess(in_type=dict, out_type=dict)
    def execute(self, data):
        beers = data.get("beers")
        suggested_beers = get_sims(beers)
        result = []
        for beer in suggested_beers:
            result.append({"beer": beer})
        return result

# username/apikey: greg/a1432a3b43e14b801ccb558fbe6bf30d 
yh = Yhat(raw_input("Yhat username: "), raw_input("Yhat apikey: "), "https://sandbox.yhathq.com/")
print yh.deploy("BeerRecommender", BeerRecommender, globals())

Yhat username: greg
Yhat apikey: a1432a3b43e14b801ccb558fbe6bf30d
Are you sure you want to deploy? (y/N): y


Transfering Model: |############################|100% Time: 00:00:02 196.24 K/s


extracting model
{'status': 'OK', 'message': 'Model successfully uploaded. Your model will begin building momentarily. Please see https://sandbox.yhathq.com/ for more details'}


In [6]:
yh.predict("BeerRecommender", { "beers": ["Sierra Nevada Pale Ale", "120 Minute IPA"]})

{u'result': [{u'beer': u'Sierra Nevada Celebration Ale'},
  {u'beer': u'90 Minute IPA'},
  {u'beer': u'60 Minute IPA'},
  {u'beer': u'Stone Ruination IPA'},
  {u'beer': u'Stone IPA (India Pale Ale)'},
  {u'beer': u'Sierra Nevada Bigfoot Barleywine Style Ale'},
  {u'beer': u'HopDevil Ale'},
  {u'beer': u'Arrogant Bastard Ale'},
  {u'beer': u'Samuel Adams Boston Lager'},
  {u'beer': u'Storm King Stout'},
  {u'beer': u'Old Rasputin Russian Imperial Stout'},
  {u'beer': u'Double Bastard Ale'},
  {u'beer': u'Brooklyn Black Chocolate Stout'},
  {u'beer': u'Prima Pils'},
  {u'beer': u'Two Hearted Ale'},
  {u'beer': u'Hop Rod Rye'},
  {u'beer': u'Hop Wallop'},
  {u'beer': u'Ayinger Celebrator Doppelbock'},
  {u'beer': u'La Fin Du Monde'},
  {u'beer': u'Stone Imperial Russian Stout'},
  {u'beer': u'Oaked Arrogant Bastard Ale'},
  {u'beer': u'St. Bernardus Abt 12'},
  {u'beer': u'Duvel'},
  {u'beer': u'Anchor Steam Beer'},
  {u'beer': u'India Pale Ale'},
  {u'beer': u'Stone Smoked Porter'},
  {u