# NYC Chain Restaurant Inspection Results

In [27]:
import pandas as pd
from sodapy import Socrata
from config import API_Key, username, password
import requests
import json
import matplotlib.pyplot as plt
import re, datetime

from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.io import show, output_notebook, output_file
from bokeh.transform import dodge
from bokeh.core.properties import value
from bokeh.palettes import Spectral6

In [2]:
output_notebook()

## 1. Loading Data from API

In [3]:
# Example authenticated client (needed for non-public datasets):
client = Socrata("data.cityofnewyork.us", API_Key, username, password)

# Returned as JSON from API by sodapy.
data = client.get("43nn-pn8j",limit=381912)

# Convert to pandas DataFrame
data_df = pd.DataFrame.from_records(data)

In [4]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 381912 entries, 0 to 381911
Data columns (total 18 columns):
action                   381486 non-null object
boro                     381912 non-null object
building                 381691 non-null object
camis                    381912 non-null object
critical_flag            381912 non-null object
cuisine_description      381912 non-null object
dba                      381806 non-null object
grade                    192394 non-null object
grade_date               190780 non-null object
inspection_date          381912 non-null object
inspection_type          381486 non-null object
phone                    381894 non-null object
record_date              381912 non-null object
score                    364787 non-null object
street                   381912 non-null object
violation_code           377030 non-null object
violation_description    374249 non-null object
zipcode                  376412 non-null object
dtypes: object(18)
memory

In [5]:
data_df = data_df.replace({"PAPA JOHN'S PIZZA":"PAPA JOHN'S",
                           "DUNKIN DONUTS":"DUNKIN' DONUTS"})
data_df['inspection_year'] = data_df['inspection_date'].apply(lambda x: datetime.datetime.strptime(x,'%Y-%m-%dT%H:%M:%S.%f').year)

In [6]:
data_df['inspection_year'].unique()

array([2018, 2017, 2016, 2019, 2015, 2014, 2013, 2012, 2011, 1900],
      dtype=int64)

In [105]:
data_df.head(2)

Unnamed: 0,action,boro,building,camis,critical_flag,cuisine_description,dba,grade,grade_date,inspection_date,inspection_type,phone,record_date,score,street,violation_code,violation_description,zipcode,inspection_year
0,Violations were cited in the following area(s).,BRONX,1007,30075445,Not Critical,Bakery,MORRIS PARK BAKE SHOP,A,2018-05-11T00:00:00.000,2018-05-11T00:00:00.000,Cycle Inspection / Initial Inspection,7188924968,2019-05-11T06:08:52.000,5,MORRIS PARK AVE,08C,Pesticide use not in accordance with label or ...,10462,2018
1,Violations were cited in the following area(s).,BRONX,1007,30075445,Not Critical,Bakery,MORRIS PARK BAKE SHOP,A,2018-05-11T00:00:00.000,2018-05-11T00:00:00.000,Cycle Inspection / Initial Inspection,7188924968,2019-05-11T06:08:52.000,5,MORRIS PARK AVE,10F,Non-food contact surface improperly constructe...,10462,2018


## 2. Chain Restaurants by Year and Boroughs
- Create a function `make_df` to create dataframes for analysis
- Group all non-chain restaurants into groups

In [7]:
# A copy of data_df is to take in all the data manipulation
# Original data_df is reserved to be used again

stores = data_df.copy()

In [114]:
# data = dataset
# value_col = columns to be aggregated
# index_col = columns to be grouped by

def make_df(data, value_col, index_col=['boro','dba']):
    df = data.groupby(index_col)[value_col].count().reset_index()
    df = pd.DataFrame(df)
    for value in value_col:
        df = df.rename(columns = {value:f'{value}_count'})
    df.to_csv(f'{str(index_col[0])}_tidy.csv')
    return df

In [107]:
store_num = make_df(data_df, ['camis'], index_col=['inspection_date','inspection_year','dba'])
store_num.head()

Unnamed: 0,inspection_date,inspection_year,dba,camis_count
0,1900-01-01T00:00:00.000,1900,18TH WARD BREWPUB,1
1,1900-01-01T00:00:00.000,1900,1942 LOUNGE,1
2,1900-01-01T00:00:00.000,1900,207 BAKERY & COFFEE SHOP,1
3,1900-01-01T00:00:00.000,1900,23 WINEHOUSE ITALIAN BISTR0,1
4,1900-01-01T00:00:00.000,1900,7 TERIYAKI SUSHI,1


In [110]:
boro_store = make_df(data_df,['camis'],index_col=['boro','inspection_year','dba','grade'])
boro_store.head()

Unnamed: 0,boro,inspection_year,dba,grade,camis_count
0,BRONX,2014,STAN'S SPORTS BAR,A,3
1,BRONX,2015,1617-A NATIONAL BAKERY,A,2
2,BRONX,2015,1617-A NATIONAL BAKERY,C,2
3,BRONX,2015,224TH CORNER RESTAURANT & BAKERY,A,2
4,BRONX,2015,7 SPICES,A,2
