In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import timestring
#from bokeh.plotting import figure
from bokeh.charts import Histogram, Bar, Scatter
from bokeh.io import output_notebook, push_notebook, show

import seaborn as sns
sns.set(style="darkgrid")
import folium

%matplotlib inline
output_notebook()

In [None]:
df = pd.read_csv('data/Graffiti_in_NYC_since_Oct_2013.csv', header=0, sep=',', quotechar='"')
df.head(5)

# Data cleanup

In [None]:
delete_columns = ["Intersection Street 1", "Intersection Street 2", "Address Type", "Landmark", "Facility Type",\
                  "Status", "X Coordinate (State Plane)", "Y Coordinate (State Plane)", "Park Facility Name",\
                  "Park Borough", "School Name", "School Number", "School Region", "School Code",\
                  "School Phone Number", 
                  "School Address", "School City", "School State", "School Zip", "School Not Found",\
                  "School or Citywide Complaint", "Vehicle Type", "Taxi Company Borough", "Taxi Pick Up Location",\
                  "Bridge Highway Name", "Bridge Highway Direction", "Road Ramp", "Bridge Highway Segment",\
                  "Garage Lot Name", "Ferry Direction", "Ferry Terminal Name","Due Date", \
                  "Resolution Action Updated Date","Incident Address","Street Name","Cross Street 1","Cross Street 2",\
                  "Location Type", "Resolution Description"]

df.drop(delete_columns, axis=1, inplace=True)
df.shape

In [None]:
#Drop NAs in several categories and print new null sums
df = df.dropna(subset=['Latitude',"Longitude","Complaint Type", "Incident Zip"], how='any')
df.isnull().sum()

In [None]:
#Change incident_zip column datatype from float to str
#to map with GeoJSON
df['Incident Zip'] = df['Incident Zip'].astype(int)
df['Incident Zip'] = df['Incident Zip'].astype(str)
df['Incident Zip'].dtype

In [None]:
#Cleaning column name syntax
df.columns = ['unique_key','created_date', 'closed_date', 'agency', 'agency_name','complaint_type',\
                    'descriptor', 'incident_zip', 'city','community_board', 'borough', 'latitude', 'longitude',\
                    'location']
df.head(5)

# Data analysis

In [None]:
#count by borough
borough_count = sns.countplot(x="borough", data=df)

In [None]:
# List of months by number.
# timestamps = [timestring.parse(row[2]) for row in df.itertuples()]
months = [timestring.parse(row[2])['month'] for row in df.itertuples()]
#print(sorted(months))

# Define the plot
plot = Histogram(months, xlabel='Months', ylabel='Frequency', title='Graffiti Complaints by Month in 2013')
handle = show(plot, notebook_handle=True)  # Show the figure.

# Choropleth map

In [None]:
#choropleth map
zipcode_geo = r'data/ZIPNYC.geojson'

zip_data = df["incident_zip"].value_counts()
zip_count = pd.DataFrame(zip_data).reset_index()
zip_count.columns = ["postalCode","count"]


#Let Folium determine the scale
map = folium.Map(location=[40.7128, -74.0059], zoom_start=11)
map.choropleth(geo_path=zipcode_geo, data=zip_count,
             columns=["postalCode","count"],
             key_on='feature.properties.postalCode',
             fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2)

map.save('html/graffiti_by_zipcode.html')
map

# Adding Banksy graffitis to the map

In [None]:
NYC_spots = np.array([[-73.992104,40.715202],
[-73.99231,40.743557],
[-73.939957,40.708389],
[-74.011681,40.678394],
[-73.951019,40.735023],
[-73.892136,40.672375],
[-73.896439,40.746998],
[-74.009491,40.718002],
[-73.942383,40.702347],
[-74.1502,40.579533],
[-73.979899,40.783811],
[-73.914996,40.818608],
[-73.842684,40.759781],
[-73.995913,40.767355],
[-74.016368,40.651752],
[-73.959006,40.728283],
[-73.981946,40.579219],
[-73.926355,40.829606],
[-73.933027,40.737096]])
spots_lat = NYC_spots[:,1]
spots_lon = NYC_spots[:,0]

In [None]:
from folium import plugins
for i in range(len(NYC_spots)):
    folium.Marker(location=[spots_lat[i],spots_lon[i]],popup="{0}, {1}".format(spots_lat[i],spots_lon[i])).add_to(map)
    
map.save('html/graffiti_by_zipcode_w_Banksy.html')
map