# NYC Inspections Data Visualization with GeoJSON and Folium

In [1]:
import pandas as pd
from sodapy import Socrata
from config import API_Key, username, password
import numpy as np
import requests
import json
import matplotlib.pyplot as plt
import re, datetime
import os
import folium
import json
print(folium.__version__)

0.8.3


## 1. Load and clean data from API

In [2]:
# Example authenticated client (needed for non-public datasets):
client = Socrata("data.cityofnewyork.us", API_Key, username, password)

# Returned as JSON from API by sodapy.
data = client.get("43nn-pn8j",limit=381912)

In [3]:
# Convert to dataframe
data_df = pd.DataFrame.from_records(data)
data_df.score = data_df.score.astype(float)
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 381912 entries, 0 to 381911
Data columns (total 18 columns):
action                   381230 non-null object
boro                     381912 non-null object
building                 381691 non-null object
camis                    381912 non-null object
critical_flag            381912 non-null object
cuisine_description      381912 non-null object
dba                      381701 non-null object
grade                    192655 non-null object
grade_date               190370 non-null object
inspection_date          381912 non-null object
inspection_type          381230 non-null object
phone                    381903 non-null object
record_date              381912 non-null object
score                    364453 non-null float64
street                   381912 non-null object
violation_code           376768 non-null object
violation_description    374047 non-null object
zipcode                  376396 non-null object
dtypes: float64(1), obje

In [4]:
# Group data by restaurant id and violation code 
violation = data_df.groupby(['camis','violation_code'])['violation_code'].count()
violation = pd.DataFrame(violation)
violation = violation.rename(columns={'violation_code':'violation_counts'})

# Unstack and transpose the df to record total count of all violations
violation = violation['violation_counts'].unstack(0).T
violation.fillna(0, inplace=True)
violation.reset_index(inplace=True)
violation.head()

violation_code,camis,02A,02B,02C,02D,02E,02F,02G,02H,02I,...,20B,20D,20E,20F,22A,22B,22C,22E,22F,22G
0,30075445,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30112340,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,30191841,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,40356018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,40356151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [5]:
data_df.columns

Index(['action', 'boro', 'building', 'camis', 'critical_flag',
       'cuisine_description', 'dba', 'grade', 'grade_date', 'inspection_date',
       'inspection_type', 'phone', 'record_date', 'score', 'street',
       'violation_code', 'violation_description', 'zipcode'],
      dtype='object')

In [6]:
inspection = data_df.copy()

In [7]:
merged = pd.merge(inspection, violation, on='camis', how='left')
merged.head()

Unnamed: 0,action,boro,building,camis,critical_flag,cuisine_description,dba,grade,grade_date,inspection_date,...,20B,20D,20E,20F,22A,22B,22C,22E,22F,22G
0,Violations were cited in the following area(s).,BRONX,1007,30075445,Not Critical,Bakery,MORRIS PARK BAKE SHOP,A,2018-05-11T00:00:00.000,2018-05-11T00:00:00.000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Violations were cited in the following area(s).,BRONX,1007,30075445,Not Critical,Bakery,MORRIS PARK BAKE SHOP,A,2018-05-11T00:00:00.000,2018-05-11T00:00:00.000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Violations were cited in the following area(s).,BRONX,1007,30075445,Critical,Bakery,MORRIS PARK BAKE SHOP,A,2017-05-18T00:00:00.000,2017-05-18T00:00:00.000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Violations were cited in the following area(s).,BRONX,1007,30075445,Not Critical,Bakery,MORRIS PARK BAKE SHOP,A,2017-05-18T00:00:00.000,2017-05-18T00:00:00.000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Violations were cited in the following area(s).,BRONX,1007,30075445,Critical,Bakery,MORRIS PARK BAKE SHOP,A,2016-02-18T00:00:00.000,2016-02-18T00:00:00.000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 2. Set up a base map
- Pass starting coordinates to Folium [lat,lon]
- tiles = `OpenStreetMap` by default, but we can also try `Stamen Terrain`,`Stamen Toner`, `Mapbox Bright`, and `Mapbox Control Room`
- Save map in a file `m.save('index.html')`

In [8]:
# Set up a base map
m_test = folium.Map(location=[40.730610, -73.935242],
               tiles='Stamen Terrain')

# Enable lat/lng popovers 
m_test.add_child(folium.LatLngPopup())

# Show map
m_test

## 3. Find GeoJSON and load GeoJSON
- Source for GeoJSON of NYC: <br> http://data.beta.nyc/dataset/nyc-zip-code-tabulation-areas/resource/6df127b1-6d04-4bb7-b983-07402a2c3f90?view_id=b34c6552-9fdb-4f95-8810-0588ad1a4cc8
- GeoJSON is needed to represent the boundaries of each zip code in NYC

In [9]:
# Load GeoJSON
with open('nyc-zipcode.geojson','r') as jsonFile:
    geo_data = json.load(jsonFile)
geojson_zip = geo_data

geojson_zip['features'][1]

{'type': 'Feature',
 'properties': {'OBJECTID': 2,
  'postalCode': '11004',
  'PO_NAME': 'Glen Oaks',
  'STATE': 'NY',
  'borough': 'Queens',
  'ST_FIPS': '36',
  'CTY_FIPS': '081',
  'BLDGpostal': 0,
  '@id': 'http://nyc.pediacities.com/Resource/PostalCode/11004',
  'longitude': -73.711608312,
  'latitude': 40.745365835},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-73.71132911125308, 40.74947450816085],
    [-73.7106837407401, 40.75004039450918],
    [-73.70869392379949, 40.74875836557177],
    [-73.70713084684176, 40.749582740537946],
    [-73.70419967936363, 40.750169552569766],
    [-73.70218528698231, 40.744741887753854],
    [-73.70308721830736, 40.744547257355144],
    [-73.70098278625547, 40.73889569923034],
    [-73.71371033246908, 40.736174704865455],
    [-73.714741836008, 40.73782936804115],
    [-73.71567895968894, 40.73962914915626],
    [-73.71867075939582, 40.74631499057428],
    [-73.71966142935703, 40.748945236431474],
    [-73.72082889049311, 40.75117302817

## 4. Prepare dataframes for folium maps

In [10]:
zipcode_group = merged.groupby('zipcode')
score_map = zipcode_group.agg({'score': np.mean, 'camis': pd.Series.nunique})
score_map.rename(columns = {'score':'avg_score','camis':'total_restaurants'}, inplace = True)
score_map.reset_index(inplace=True)
score_map.head()
#geo_agg

Unnamed: 0,zipcode,avg_score,total_restaurants
0,10000,16.666667,2
1,10001,19.503149,475
2,10002,21.960769,501
3,10003,20.042978,665
4,10004,18.047052,139


In [11]:
# Remove zip code not in our df
geozips = []
for i in range(len(geojson_zip['features'])):
    if geojson_zip['features'][i]['properties']['postalCode'] in list(score_map['zipcode'].unique()):
        geozips.append(geojson_zip['features'][i])

# Create new json object
clean_json = dict.fromkeys(['type','features'])
clean_json['type'] = 'Feature'
clean_json['features'] = geozips

# Save clean json as clean-json
open("clean-json.json", "w").write(
    json.dumps(clean_json, sort_keys=True, indent=4, separators=(',',': ')))

1876106

## 5. Set up NYC folium map by Zip Codes

In [12]:
nyc_m = folium.Map(location=[40.730610, -73.935242],
                   tiles='Stamen Terrain')
nyc_m.add_child(folium.LatLngPopup())
folium.GeoJson(geojson_zip, name = 'geojson').add_to(nyc_m)
nyc_m

## 6. Choropleth map with folium

In [16]:
# table = main table/data frame we read from (pandas DataFrame)
# zips = column name where ZIP codes are (string)
# mapped_feature = column name for feature we want to visualize (string)
# add_text = any additional commentary to be added in the map legend (string)

def create_map(table, zips, mapped_feature, color = 'RdYlGn', add_text = ''):
    nyc_geo = r'clean-json.json'
    m = folium.Map(location = [40.730610, -73.935242],
                   tiles='Mapbox Bright')
    folium.Choropleth(
        geo_data = nyc_geo, data = table,
        key_on = 'feature.properties.postalCode',
        columns = [zips, mapped_feature],
        fill_color = color, fill_opacity = 0.7, line_opacity = 0.2,
        legend_name = (' ').join(mapped_feature.split('_')).title() + ' ' + add_text + ' Across NYC'
    ).add_to(m)
    m.add_child(folium.LatLngPopup())
    folium.LayerControl().add_to(m)
    m.save(outfile = mapped_feature + '_map.html')
    return m
    

In [17]:
create_map(score_map, 'zipcode', 'avg_score', add_text='of Restaurants')

In [18]:
create_map(score_map, 'zipcode', 'total_restaurants', 'BuPu')