In [1]:
# Package imports
from __future__ import print_function # for ipywidgets
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from scipy import sparse

from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances

import ipywidgets as wg

from ipywidgets import interact, interactive, fixed, interact_manual, widgets, interact, interact_manual, fixed
from IPython.display import display, clear_output, HTML, Javascript

pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', None)

In [2]:
# Load in pickled list of beers
with open('./data/beerlist.pickle', 'rb') as f:
    beerlist = pickle.load(f)
    
beerlist = [beers.lower() for beers in beerlist]

In [3]:
# Load in pickled list
with open('./data/beer_review_pivot_large.pickle', 'rb') as f:
    beerreviews = pickle.load(f)
    
# Load in cleaned database of beer data
full_data = pd.read_csv('./data/dmv_beer_database_extra_categories.csv')
reviews_plus = pd.merge(left=beerreviews, right=full_data, left_on=beerreviews.index, right_on=full_data['name'])
reviews_plus = reviews_plus.drop(columns=['key_0'])

In [6]:
# Widget Functionality

brewery_list = [_ for _ in reviews_plus['brewery'].unique()]

brewery_widget = widgets.Dropdown(
    options=brewery_list,
    description='Brewery:',
    disabled=False,
    width='automatic')

beerlabel_widget = widgets.Dropdown(
    options=beerlist,
    description='Label:')

state_widget = widgets.SelectMultiple(
    options=['DC','MD','VA'],
    value=['DC','MD','VA'],
    description='Regions',
    disabled=False)

rating_widget = widgets.FloatRangeSlider(
    value=[0, 5],
    min=0,
    max=5.0,
    step=0.1,
    description='Beer Ratings:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f')

prediction_style_widget = widgets.ToggleButtons(
    options=['Beer Attributes', 'Review Text', 'Collaborative'],
    description='Rec. Type:',
    value = 'Review Text',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Taking into style, ABV, number of reviews, etc.', 
              'Comparing review text', 'Beers similarly enjoyed by users'])

def update_beerlist(*args):
    beerlabel_widget.options = [_ for _ in reviews_plus[reviews_plus['brewery'] == brewery_widget.value]['name']]

instructions = widgets.HTML(value="<h3><b>Choose a beer and I'll find you something similar!</b><h4>")

filter_widget = widgets.HTML(value="<h4><b>Filters for your recommendations:</b><h4>")

# brewery_widget.observe(update_beerlist)

display(instructions)
# display(brewery_widget)
display(beerlabel_widget)

display(filter_widget)
display(state_widget)
display(rating_widget)
display(prediction_style_widget)

def run_all(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))

button = widgets.Button(description="Beer Me!", )
print("")
display(button)
button.on_click(run_all)

<IPython.core.display.Javascript object>

HTML(value="<h3><b>Choose a beer and I'll find you something similar!</b><h4>")

Dropdown(description='Label:', options=('3 stars #ultrafresh', "3 stars 'bout that life", '3 stars / aslin fli…

HTML(value='<h4><b>Filters for your recommendations:</b><h4>')

SelectMultiple(description='Regions', index=(0, 1, 2), options=('DC', 'MD', 'VA'), value=('DC', 'MD', 'VA'))

FloatRangeSlider(value=(0.0, 5.0), continuous_update=False, description='Beer Ratings:', max=5.0, readout_form…

ToggleButtons(description='Rec. Type:', index=1, options=('Beer Attributes', 'Review Text', 'Collaborative'), …




Button(description='Beer Me!', style=ButtonStyle())

In [7]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()">
<input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [8]:
# def make_basic_content_rec_df(content_matrix=dmv_beer_basic_content):
    
#     # Instantiate scaler
#     sc = StandardScaler()
    
#     # Fit and transfrom
#     dmv_beer_content_scaled = sc.fit_transform(content_matrix.todense())
    
#     # Calculate cosine similarity matrix
#     basic_content_recommender = pairwise_distances(dmv_beer_content_scaled, metric='cosine')
    
#     basic_content_recommender_df = pd.DataFrame(basic_content_recommender, columns=beerlist, index=beerlist)
    
#     return basic_content_recommender_df

In [9]:
def make_NLP_content_rec_df(base, min_rating, max_rating, 
                            states, content_df=reviews_plus, 
                            max_features=None):
    
    rating_mask = (content_df['rating'] >= min_rating) & (content_df['rating'] <= max_rating)

    if len(states) == 3:
        state_mask = (content_df['state'] == 'DC') | (content_df['state'] == 'VA') | (content_df['state'] == 'MD')
    elif len(beer_regions) == 2:
        state_mask = (content_df['state'] == states[0]) | (content_df['state'] == states[1])
    else:
        state_mask = (content_df['state'] == states[0])
        
    base_mask = content_df['name'] == base
    
    beer_corpus = content_df[(rating_mask & state_mask) | base_mask]
    
    tf = TfidfVectorizer(stop_words='english', max_features=max_features)
    
    filtered_beer_reviews = tf.fit_transform(beer_corpus['review_text'])
    
    filtered_beer_list = [beer.lower() for beer in beer_corpus['name']]
    
    reviewNLP_content_recommender = pairwise_distances(filtered_beer_reviews, metric='cosine')
    
    reviewNLP_content_recommender_df = pd.DataFrame(reviewNLP_content_recommender, 
                                                    columns=filtered_beer_list, 
                                                    index=filtered_beer_list)
    
    return reviewNLP_content_recommender_df, filtered_beer_list

In [10]:
#https://medium.com/@cristhianboujon/how-to-list-the-most-common-words-from-text-corpus-using-scikit-learn-dad4d0cab41d
#used code I found on medium to create a list of the top 10 most frequent words in filtered corpus
def get_top_n_words(corpus, n=None):
    
    # List the top n words in a vocabulary according to occurrence in a text corpus.
    
    vec = CountVectorizer(stop_words='english').fit(corpus)
    bag_of_words = vec.transform(corpus)
    sum_words = bag_of_words.sum(axis=0) 
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    words_freq = sorted(words_freq, key = lambda x: x[1], reverse=True)
    
    return words_freq[:n]

In [11]:
def recommendations(recommender_df, itemlist, base):
    
    print("Here are your closest beers!")
    print("")
    print(recommender_df.loc[base.lower()].sort_values()[1:11])
    print("")
    print("")
    print("Here are the most different beers possible!")
    print("")
    print(recommender_df.loc[base.lower()].sort_values()[-10:])
    

In [12]:
# Variables to be used in model
min_rating = rating_widget.value[0]
max_rating = rating_widget.value[1]
beer_regions = state_widget.value
base_beer = beerlabel_widget.value
prediction_style = prediction_style_widget.value

# Try-except to deal with 'early' button presses before selecting a beer
try:
    base_beer = beerlabel_widget.value
except:
    print("You didn't pick a beer!")

# def beerme(b):

if prediction_style == 'Beer Attributes':
    pass
elif prediction_style == 'Review Text':
    current_beer_df = make_NLP_content_rec_df(base_beer, min_rating, max_rating, states=beer_regions)[0]
    filtered_beers = make_NLP_content_rec_df(base_beer, min_rating, max_rating, states=beer_regions)[1]
    
    recommendations(current_beer_df, filtered_beers,base_beer)

    print("")
    print("Here are some top words!")

#     filtered_list = [beer for beer in filtered_beers]
    pop_words = get_top_n_words(beerreviews.loc[filtered_beers,'review_text'], n=20)

    top_words = []

    for i in range(len(pop_words)):
        word_freq = {'word': pop_words[i][0],
                     'frequency': pop_words[i][1]}

        top_words.append(word_freq)

    topwords_df = pd.DataFrame(top_words)

    plt.figure(figsize=(10,6))
    plt.bar(topwords_df['word'][:15], topwords_df['frequency'][:15], color='#B77D20')
    plt.title("Most used words in beer reviews", size=22)
    plt.xticks(size=12, rotation=45);

else:
    pass

Here are your closest beers!

union craft double duckpin     0.431639
duclaw neon gypsy              0.443277
hardywood the great return     0.444193
alewerks bitter valentine      0.455286
3 stars pounding trees         0.458921
heavy seas the alpha effect    0.464530
champion missile ipa           0.466634
alewerks secret admirer        0.467086
3 stars ghost white ipa        0.467730
dc brau alpha domina mellis    0.468467
Name: 3 stars #ultrafresh, dtype: float64


Here are the most different beers possible!

bluejacket heart it races                              0.995906
oliver love at first sting                             0.997960
adroit theory invisible art (rum barrel aged)          0.998135
sapwood ziparillo                                      0.998169
peabody heights flying v                               1.000000
mustang sally hat trick                                1.000000
goonda o.ji. - vic secret                              1.000000
goonda o.ji. - vic secret        

KeyError: "None of [Index(['1781 belgiquiose d' or', '1781 belsnickel', '1781 conifére',\n       '1781 el jefe', '1781 kazuno maharoba', '1781 l'automne',\n       '1781 lafayette saison (version 1 - bottled)',\n       '1781 lafayette saison (version 2 - draft)', '1781 persephone',\n       '1781 von steuben kolsch',\n       ...\n       'young veterans shock diamond', 'young veterans tower buzz',\n       'young veterans truce in the forest',\n       'de lazy lizard boardwalk copper ale',\n       'de lazy lizard chasin' tail ipa', 'de lazy lizard phat puckin' porter',\n       'de lazy lizard snow job', 'de lazy lizard strong ale',\n       'de lazy lizard trophy wife blonde ale',\n       'de lazy lizard two timin' tegu double ipa'],\n      dtype='object', name='beer', length=12128)] are in the [index]"

In [None]:
print(make_NLP_content_rec_df(base_beer)[1][:20])



In [None]:
# Instantiate scaler
# sc = StandardScaler()

In [None]:
# Fit and transfrom
# dmv_beer_content_scaled = sc.fit_transform(dmv_beer_basic_content.todense())

In [None]:
# Calculate cosine similarity matrix
# basic_content_recommender = pairwise_distances(dmv_beer_content_scaled, metric='cosine')

In [None]:
# basic_content_recommender_df = pd.DataFrame(basic_content_recommender, columns=beerlist, index=beerlist)

In [None]:
# def recommender(search, recommender_df, itemlist=beerlist):
    
#     search = search.lower()
#     itemlist = [beer.lower() for beer in itemlist] # not necessary
    
#     sublist = []
    
#     for item in itemlist:
#         try:
#             if item.index(search) >= 0:
#                 sublist.append(item)
#         except:
#             continue
    
#     for i in sublist:
#         print(i)
#         print("")
#         print('10 closest beers:')
#         print("")
#         print(recommender_df.loc[i].sort_values()[1:11])
#         print('')
#         print('10 furthest beers:')
#         print("")
#         print(recommender_df.loc[i].sort_values()[-10:])
#         print('*******************************************************************************************')
#         print('')

In [None]:
# recommender('above the clouds', recommender_df=basic_content_recommender_df)

In [None]:
# # Load in pickled list
# with open('./data/beer_review_pivot_large.pickle', 'rb') as f:
#     beerreviews = pickle.load(f)

In [None]:
# tf = TfidfVectorizer(stop_words='english', max_features=None)
# cv = CountVectorizer(stop_words='english', max_features = 5_000)

In [None]:
# dmv_beer_reviews = tf.fit_transform(beerreviews['review_text'])
# dmv_beer_reviews = cv.fit_transform(beerreviews['review_text'])

In [None]:
# dmv_beer_reviews.todense()

In [None]:
# reviewNLP_content_recommender = pairwise_distances(dmv_beer_reviews, metric='cosine')

In [None]:
# reviewNLP_content_recommender

In [None]:
# reviewed_beers = [beers.lower() for beers in beerreviews.index.values]

In [None]:
# reviewNLP_content_recommender_df = pd.DataFrame(reviewNLP_content_recommender, columns=reviewed_beers, 
#                                                 index=reviewed_beers)
# reviewNLP_content_recommender_df.head()

In [None]:
# recommender('above the clouds', recommender_df=reviewNLP_content_recommender_df, itemlist=reviewed_beers)

In [None]:
# pop_words = get_top_n_words(['review_text'], n=20)

# top_words = []

# for i in range(len(pop_words)):
#     word_freq = {'word': pop_words[i][0],
#                  'frequency': pop_words[i][1]}
    
#     top_words.append(word_freq)
    
# topwords_df = pd.DataFrame(top_words)    

In [None]:
# plt.figure(figsize=(10,6))
# plt.bar(topwords_df['word'][:15], topwords_df['frequency'][:15], color='#B77D20')
# plt.title("Most used words in beer reviews", size=22)
# plt.xticks(size=12, rotation=45);

In [None]:
# full_data = pd.read_csv('./data/dmv_beer_database_extra_categories.csv')
# reviews_plus = pd.merge(left=beerreviews, right=full_data, left_on=beerreviews.index, right_on=full_data['name'])
# reviews_plus = reviews_plus.drop(columns='key_0')
# reviews_plus.head();

In [None]:
# # Filter based on rating
# beer_corpus = reviews_plus[reviews_plus['rating'] > 3.5]

In [None]:
# tf = TfidfVectorizer(stop_words='english', max_features=5_000)
# filtered_beer_reviews = tf.fit_transform(beer_corpus['review_text'])
# filtered_beer_list = [beer.lower() for beer in beer_corpus['name']]
# reviewNLP_content_recommender = pairwise_distances(filtered_beer_reviews, metric='cosine')
# reviewNLP_content_recommender_df = pd.DataFrame(reviewNLP_content_recommender, 
#                                                 columns=filtered_beer_list, 
#                                                 index=filtered_beer_list)

In [None]:
# recommender('Evolution Craft Lot #6 Double IPA', recommender_df=reviewNLP_content_recommender_df, itemlist=filtered_beer_list)

In [None]:
# # Filter based on distance?


# beer_corpus = reviews_plus[reviews_plus['rating'] > 3.5]

In [None]:
# # Widget Functionality

# brewery_list = [_ for _ in reviews_plus['brewery'].unique()]

# brewery_widget = widgets.Dropdown(
#     options=brewery_list,
#     description='Brewery:',
#     disabled=False,
#     width='automatic')

# beerlabel_widget = widgets.Dropdown(
#     description='Label:')

# state_widget = widgets.SelectMultiple(
#     options=['DC','MD','VA'],
#     value=['DC','MD','VA'],
#     description='Regions',
#     disabled=False)

# rating_widget = widgets.FloatRangeSlider(
#     value=[0, 5],
#     min=0,
#     max=5.0,
#     step=0.1,
#     description='Beer Ratings:',
#     disabled=False,
#     continuous_update=False,
#     orientation='horizontal',
#     readout=True,
#     readout_format='.1f')

# prediction_style_widget = widgets.ToggleButtons(
#     options=['Beer Attributes', 'Review Text', 'Collaborative'],
#     description='Rec. Type:',
#     value = 'Review Text',
#     disabled=False,
#     button_style='', # 'success', 'info', 'warning', 'danger' or ''
#     tooltips=['Taking into style, ABV, number of reviews, etc.', 
#               'Comparing review text', 'Beers similarly enjoyed by users'])

# def update_beerlist(*args):
#     beerlabel_widget.options = [_ for _ in reviews_plus[reviews_plus['brewery'] == brewery_widget.value]['name']]

# instructions = widgets.HTML(value="<h3><b>Choose a beer and I'll find you something similar!</b><h4>")

# filter_widget = widgets.HTML(value="<h4><b>Filters for your recommendations:</b><h4>")

# brewery_widget.observe(update_beerlist)

# display(instructions)
# display(brewery_widget)
# display(beerlabel_widget)

# display(filter_widget)
# display(state_widget)
# display(rating_widget)
# display(prediction_style_widget)

# def run_all(ev):
#     display(Javascript('IPython.notebook.execute_cells_below()'))

# button = widgets.Button(description="Beer Me!", )
# print("")
# display(button)
# button.on_click(beerme())

In [None]:
# # Variables to be used in model
# min_rating = rating_widget.value[0]
# max_rating = rating_widget.value[1]
# beer_regions = state_widget.value
# base_beer = beerlabel_widget.value
# prediction_style = prediction_style_widget.value

# # Try-except to deal with 'early' button presses before selecting a beer
# try:
#     base_beer = beerlabel_widget.value
# except:
#     print("You didn't pick a beer!")

In [None]:
def click_rec(b):
    beerme(b)
        
# button.on_click(click_rec)