# Prepare Data for Mapping Purpose 

# Visualization

### Note: This is a function which allows notebook show/hide codes

In [36]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')#

## Pyplot
Pyplot was introduced in class. To use this package, you need to download at https://pypi.python.org/pypi/pygal, then install locally.

In [12]:
import pandas as pd
import pygal as pg
from IPython.display import display, HTML
from pygal.style import DarkStyle, NeonStyle, BlueStyle, DarkGreenStyle, LightColorizedStyle
import numpy as np
import matplotlib.pyplot as plt

### setting up the HTML with the necessary javascript and chart svg 
base_html = """
<!DOCTYPE html>
<html>
  <head>
  <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
  <script type="text/javascript" src="https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js""></script>
  </head>
  <body>
    <figure>
      {rendered_chart}
    </figure>
  </body>
</html>
"""

def galplot(chart):
    rendered_chart = chart.render(is_unicode=True)
    plot_html = base_html.format(rendered_chart=rendered_chart)
    display(HTML(plot_html))
    
data = pd.read_csv("/Users/hetianbai/Desktop/1007_Yelp_Project-master/Restaurant_df_1007.csv")

## Plot average rating and price by categories

In [13]:
from pygal.style import Style


data['price'] = data['price'].str.len()   # convert $ to number
index = ['cat', 'price', 'rating']
clean_data = data[index]
avg_data = clean_data.groupby('cat').mean()
avg_data


# Interactive Bar Chart
custom_style = Style(label_font_size = 15.0, major_label_font_size = 15)

ibar_chart = pg.Bar(x_title = 'Restaurant Categories',style = custom_style)
ibar_chart.title = 'Restarants'
ibar_chart.x_labels = ['Chinese', 'French', 'Indian', 'Italian', 'Japanese', 'Korean', 'Mexican']


for cols in avg_data.columns:
    ibar_chart.add(cols,avg_data[cols])

galplot(ibar_chart)   ### display pygal in Jupyter notebook


ibar_chart.render_to_file('chart.svg')

## Plot rating overall by price

### Higher price lead to higher rating

In [16]:
p_r = clean_data.groupby('price').mean()
p_r

Unnamed: 0_level_0,rating
price,Unnamed: 1_level_1
1.0,3.76475
2.0,3.714518
3.0,3.838983
4.0,3.977477


In [17]:
custom_style = Style(label_font_size = 15.0, major_label_font_size = 15)

bar_chart = pg.Bar(x_label_rotation=20, title=u'Rating by Price', x_title = 'Price', y_title = 'Rating',style = custom_style)
bar_chart.x_labels = '$','$$','$$$','$$$_$'     
bar_chart.add("rating",list(p_r['rating']))

galplot(bar_chart) 

## Plot price overall by rating

No obvious trend 

In [18]:
r_p = clean_data.groupby('rating').mean()
r_p


Unnamed: 0_level_0,price
rating,Unnamed: 1_level_1
0.0,1.777778
1.0,1.0
1.5,2.0
2.0,1.674419
2.5,1.745665
3.0,1.781818
3.5,1.879542
4.0,1.930999
4.5,1.819885
5.0,1.72093


In [19]:
r_p.index

Float64Index([0.0, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0], dtype='float64', name='rating')

In [20]:
custom_style = Style(label_font_size = 15.0, major_label_font_size = 15)

bar_chart = pg.Bar(title=u'Price by rating', x_title = 'Rating', y_title = 'Price', style = custom_style)

bar_chart.x_labels = list(r_p.index)   
bar_chart.add("Price",list(r_p['price']))

galplot(bar_chart) 

## SolidGauge

In [21]:
clean_data.groupby(['rating']).size()

rating
0.0     161
1.0      18
1.5      15
2.0      60
2.5     190
3.0     583
3.5    1528
4.0    2020
4.5     747
5.0     179
dtype: int64

In [22]:

gauge = pg.SolidGauge(inner_radius=0.70)


gauge.add('price$/total', [{'value': 1339, 'max_value':5501}])
gauge.add('price$$/total', [{'value': 3072, 'max_value': 5501}])
gauge.add('price$$$/total', [{'value': 472, 'max_value': 5501}])
gauge.add('price$$$_$/total', [{'value': 111, 'max_value': 5501}])

                               
# gauge.add('rating [0,1)/total', [{'value': 161, 'max_value': 5501}])
# gauge.add('rating [1,2)/total', [{'value': 18 + 15, 'max_value': 5501}])
# gauge.add('rating [2,3)/total', [{'value': 60 + 190, 'max_value': 5501}])
# gauge.add('rating [3,4)/total', [{'value': 583 + 1528, 'max_value': 5501}])
# gauge.add('rating [4,5)/total', [{'value': 2020 + 747, 'max_value': 5501}])
# gauge.add('rating 5/total', [{'value': 179, 'max_value': 5501}])                               
                               
galplot(gauge)

## Average Rating by Categories by price

In [23]:
r_cp = clean_data['rating'].groupby([clean_data['cat'],clean_data['price']]).mean()
r_cp = pd.DataFrame(r_cp)
r_cp
r_cp = r_cp.reset_index(level=['cat', 'price'])

In [24]:
custom_style = Style(label_font_size = 15.0, major_label_font_size = 15)

bar_chart = pg.Bar(x_title = 'Restaurant Categories', y_title = 'Rating', style = custom_style)
bar_chart.title = 'Restarants rating by categories by price'
bar_chart.x_labels = ['Chinese', 'French', 'Indian', 'Italian', 'Japanese', 'Korean', 'Mexican']

bar_chart.add('price = $',list(r_cp[r_cp['price'] == 1].rating))
bar_chart.add('price = $$',list(r_cp[r_cp['price'] == 2].rating))
bar_chart.add('price = $$$',list(r_cp[r_cp['price'] == 3].rating))
bar_chart.add('price = $$$_$',list(r_cp[r_cp['price'] == 4].rating))

galplot(bar_chart) 

## Average Price by Categories by rating

In [25]:
new = clean_data.drop([2839,3615], axis = 0)


p_cr=new['price'].groupby([new['cat'],new['rating']]).mean()
p_cr = pd.DataFrame(p_cr)
p_cr = p_cr.reset_index(level = ['cat', 'rating'])
# p_cr[head]

In [26]:
custom_style = Style(label_font_size = 15.0, major_label_font_size = 15)

bar_chart = pg.Radar(x_title = 'Restaurant Categories', y_title = 'Price', style = custom_style, range=(0, 3))
bar_chart.title = 'Restarants price by categories by rating'
bar_chart.x_labels = ['Chinese', 'French', 'Indian', 'Italian', 'Japanese', 'Korean', 'Mexican']


bar_chart.add('rating = 0',list(p_cr[p_cr['rating'] == 0].price))
bar_chart.add('rating = 1',list(p_cr[p_cr['rating'] == 1].price)) 
bar_chart.add('rating = 1.5',list(p_cr[p_cr['rating'] == 1.5].price))  
bar_chart.add('rating = 2',list(p_cr[p_cr['rating'] == 2].price))
bar_chart.add('rating = 2.5',list(p_cr[p_cr['rating'] == 2.5].price))
bar_chart.add('rating = 3',list(p_cr[p_cr['rating'] == 3].price))
bar_chart.add('rating = 3.5',list(p_cr[p_cr['rating'] == 3.5].price))
bar_chart.add('rating = 4',list(p_cr[p_cr['rating'] == 4].price))
bar_chart.add('rating = 4.5',list(p_cr[p_cr['rating'] == 4.5].price))
bar_chart.add('rating = 5',list(p_cr[p_cr['rating'] == 5].price))


galplot(bar_chart) 

## Trend Analysis price vs rating by restaurant categories

In [27]:
p_cr = p_cr.dropna()

p_cr['rating_price'] = p_cr[['rating', 'price']].apply(tuple, axis=1)
chi_list = list(p_cr[p_cr['cat']=='chinese']['rating_price'])
ita_list = list(p_cr[p_cr['cat']=='italian']['rating_price'])
ind_list = list(p_cr[p_cr['cat']=='indpak']['rating_price'])
mex_list = list(p_cr[p_cr['cat']=='mexican']['rating_price'])
kor_list = list(p_cr[p_cr['cat']=='korean']['rating_price'])
fre_list = list(p_cr[p_cr['cat']=='french']['rating_price'])
jan_list = list(p_cr[p_cr['cat']=='japanese']['rating_price'])
# new_list = list(p_cr['rating_price'])


In [28]:
custom_style = Style(label_font_size = 15.0, major_label_font_size = 15)

xy_chart = pg.XY(stroke= True, x_title = 'Rating', y_title = 'Price', style = custom_style)
xy_chart.title = 'price vs rating by categories'

xy_chart.add('chinese', chi_list)
xy_chart.add('italian', ita_list)
xy_chart.add('indian', ind_list)
xy_chart.add('mexican', mex_list)
xy_chart.add('korean', kor_list)
xy_chart.add('french', fre_list)
xy_chart.add('janpanese', jan_list)

galplot(xy_chart) 