# Visualizations for NYC Chain Restaurant Inspection Results

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import re, datetime

from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.io import show, output_notebook, output_file
from bokeh.transform import dodge
from bokeh.core.properties import value
from bokeh.palettes import Spectral6

In [2]:
output_notebook()

## 1. Line Charts

In [3]:
data = pd.read_csv('camis_tidy.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,inspection_date,inspection_year,dba,camis_count
0,0,1900-01-01T00:00:00.000,1900,18TH WARD BREWPUB,1
1,1,1900-01-01T00:00:00.000,1900,1942 LOUNGE,1
2,2,1900-01-01T00:00:00.000,1900,207 BAKERY & COFFEE SHOP,1
3,3,1900-01-01T00:00:00.000,1900,23 WINEHOUSE ITALIAN BISTR0,1
4,4,1900-01-01T00:00:00.000,1900,7 TERIYAKI SUSHI,1


In [5]:
store_num = data.loc[data['inspection_year'] >= 2015]
local = store_num.loc[store_num['camis_count'] <= 100]
print(len(list(local['dba'].unique())))
local_list = list(local['dba'].unique())
local_list

20479


['RELISH CONCESSIONS (LASKER RINK)',
 'DELMY FOOD',
 'EL PATRON NIGHTCLUB CABARET-RESTAURANT',
 'VILLA BARONE',
 'OLYMPIC PIER CAFE',
 'HUNAN GLATT KOSHER',
 'SORRENTINE',
 '4O KNOTS',
 "CLAUDINE'S",
 'GRAND OASIS RESTAURANT',
 'BAR NONE',
 'MERRY LAND BUFFET',
 'BROOKLYN SWEET SPOT',
 'TERTULIA',
 'PORTALIA RISTORANTE BAR AND LOUNGE',
 'RICHMOND COUNTY COUNTRY CLUB',
 'THEATER CAFE',
 'NEW POLODOS LOUNGE',
 'NOVITA',
 "GIACOMO'S WOOD FIRED PIZZA",
 "Pinot's Pallette",
 'SABOR NORTENO',
 'RICHMOND COUNTY YACHT CLUB',
 'DESNUDA',
 'TREADWELL PARK / BLACK HOUND',
 'THE FINCH',
 'BEST WESTERN PREMIER HERALD SQUARE',
 "STEINBERG CENTER THEATER(JODI'S LOUNGE)",
 "GUSSY'S BAR",
 'FALAFEL OFF THE CORNER',
 'KNIGHTS OF COLUMBUS HOUSE',
 'SHAARE ZION CATERERS',
 'HUDSON BAR AND BOOKS',
 'HECHO EN DUMBO',
 'PHILIPPE NYC I',
 'ROYAL RIB HOUSE',
 'POK POK NY',
 'LE COLONIAL',
 'VAN LEEUWEN ARTISAN ICE CREAM',
 'GINA MEXICANA',
 'MIX',
 'AGUA ROJA BAR & LOUNGE',
 'ATWOOD KITCHEN & BAR',
 'DAVIS FAM

In [8]:
chain = store_num.loc[store_num['camis_count'] > 10]
chain_rank = chain.groupby(['dba'])['camis_count'].sum().reset_index().sort_values(['camis_count'],ascending=False)
print(len(list(chain_rank['dba'].unique())))
chain_list = list(chain_rank['dba'].unique())[:10]
chain_rank.head(10)

172


Unnamed: 0,dba,camis_count
42,DUNKIN' DONUTS,1128
144,SUBWAY,576
143,STARBUCKS,286
95,MCDONALD'S,155
76,KENNEDY FRIED CHICKEN,103
40,DOMINO'S,94
32,CROWN FRIED CHICKEN,61
54,GOLDEN KRUST CARIBBEAN BAKERY & GRILL,59
43,"DUNKIN' DONUTS, BASKIN ROBBINS",36
151,THE AINSWORTH,27


In [11]:
store_count = store_num.groupby(['inspection_year','dba'])['camis_count'].sum().reset_index()
store_count = store_count.pivot(index='inspection_year',columns='dba',values='camis_count').fillna(0)
chain = store_count[chain_list]
chain.head()

dba,DUNKIN' DONUTS,SUBWAY,STARBUCKS,MCDONALD'S,KENNEDY FRIED CHICKEN,DOMINO'S,CROWN FRIED CHICKEN,GOLDEN KRUST CARIBBEAN BAKERY & GRILL,"DUNKIN' DONUTS, BASKIN ROBBINS",THE AINSWORTH
inspection_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015,213.0,65.0,61.0,88.0,35.0,59.0,9.0,29.0,66.0,0.0
2016,883.0,628.0,415.0,525.0,238.0,265.0,154.0,193.0,226.0,22.0
2017,1255.0,767.0,645.0,573.0,269.0,322.0,211.0,233.0,364.0,14.0
2018,1379.0,1054.0,688.0,571.0,476.0,281.0,438.0,297.0,308.0,52.0
2019,536.0,467.0,291.0,302.0,253.0,118.0,148.0,100.0,143.0,25.0


In [12]:
cds_chain = ColumnDataSource(chain)
cds_chain.column_names

['inspection_year',
 "DUNKIN' DONUTS",
 'SUBWAY',
 'STARBUCKS',
 "MCDONALD'S",
 'KENNEDY FRIED CHICKEN',
 "DOMINO'S",
 'CROWN FRIED CHICKEN',
 'GOLDEN KRUST CARIBBEAN BAKERY & GRILL',
 "DUNKIN' DONUTS, BASKIN ROBBINS",
 'THE AINSWORTH']

In [18]:
chart = figure(height = 1000, width = 1000,
               title = "Number of Locations",
               x_axis_type = "linear",
               tools = "pan,wheel_zoom,box_zoom,reset,save,zoom_in,zoom_out,lasso_select")

In [14]:
line_list = list(cds_chain.column_names)[1:10]
line_color = ['#a6cee3','#1f78b4','#b2df8a','#33a02c',
              '#fb9a99','#e31a1c','#fdbf6f','#ff7f00',
              '#cab2d6','#6a3d9a','#ffff99','#b15928',
              '#000000','#8dd3c7','#ffffb3','#bebada','#fb8072',
              '#80b1d3','#fdb462','#b3de69','#fccde5',
              '#d9d9d9','#bc80bd','#ccebc5','#ffed6f']
#if "inspection_date" in line_list: 
 #   line_list.remove("inspection_date")
print(len(line_list))
print(len(line_color))
print(line_list)

9
25
["DUNKIN' DONUTS", 'SUBWAY', 'STARBUCKS', "MCDONALD'S", 'KENNEDY FRIED CHICKEN', "DOMINO'S", 'CROWN FRIED CHICKEN', 'GOLDEN KRUST CARIBBEAN BAKERY & GRILL', "DUNKIN' DONUTS, BASKIN ROBBINS"]


In [19]:
for i in range(len(line_list)):
    chart.line(x = "inspection_year", y = line_list[i], color = line_color[i], 
               legend = value(line_list[i]), source = cds_chain)
chart.legend.click_policy = "hide"
chart.legend.location = 'top_left'
output_file('chain_line.html')
show(chart)

# 2. Checkbox and Line Chart

In [35]:
boro = pd.read_csv('boro_tidy.csv')
boro = boro.loc[boro['inspection_year'] >= 2015]
boro = boro.loc[boro['dba'].isin(chain_list)]
boro.head()

Unnamed: 0.1,Unnamed: 0,boro,inspection_year,dba,grade,camis_count
56,56,BRONX,2015,DUNKIN' DONUTS,A,21
57,57,BRONX,2015,DUNKIN' DONUTS,C,4
58,58,BRONX,2015,"DUNKIN' DONUTS, BASKIN ROBBINS",A,2
78,78,BRONX,2015,GOLDEN KRUST CARIBBEAN BAKERY & GRILL,A,3
104,104,BRONX,2015,KENNEDY FRIED CHICKEN,A,10


In [40]:
boro_group = boro.groupby(['inspection_year','boro','dba','grade'])['camis_count'].sum().reset_index()
boro_list = list(boro_group['boro'].unique())
store_list = list(boro_group['dba'].unique())
grade_list = list(boro_group['grade'].unique())
boro_list
store_list
grade_list

['A', 'C', 'B', 'P', 'Not Yet Graded', 'Z']

In [37]:
boro_group.head()

Unnamed: 0,inspection_year,boro,dba,grade,camis_count
0,2015,BRONX,DUNKIN' DONUTS,A,21
1,2015,BRONX,DUNKIN' DONUTS,C,4
2,2015,BRONX,"DUNKIN' DONUTS, BASKIN ROBBINS",A,2
3,2015,BRONX,GOLDEN KRUST CARIBBEAN BAKERY & GRILL,A,3
4,2015,BRONX,KENNEDY FRIED CHICKEN,A,10


In [None]:
def adjust_df(chain_list):
    boro = boro_group.copy()
    for i, chain_name in enumerate(chain_list):
        each_chain = boro[boro['dba'] == chain_name]

In [None]:
def update(attr, old, new):
    # Get the list of restaurants for the graph
    boros_to_plot = store_list.labels

    # Make a new dataset based on the selected carriers and the 
    # make_dataset function defined earlier
    new_src = make_dataset(carriers_to_plot,
                           range_start = -60,
                           range_end = 120,
                           bin_width = 5)
    
    # Convert dataframe to column data source
    new_src = ColumnDataSource(new_src)

    # Update the source used the quad glpyhs
    src.data.update(new_src.data)

In [41]:
for i, a in enumerate(boro_list):
    print(f'i:{i},a:{a}')

i:0,a:BRONX
i:1,a:BROOKLYN
i:2,a:MANHATTAN
i:3,a:QUEENS
i:4,a:STATEN ISLAND
