In [None]:
#Imports
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import folium
import timestring
import seaborn as sns
sns.set(style="darkgrid")

In [None]:
#Option set to display all columns
#pd.set_option('display.max_columns', None)

In [None]:
noise_df = pd.read_csv("data/Noise_complaints_in_2016.csv")

In [None]:
noise_df.shape

In [None]:
noise_df.head(5)

# Data cleanup

In [None]:
noise_df.isnull().sum()

In [None]:
delete_columns = ["Intersection Street 1", "Intersection Street 2", "Address Type", "Landmark", "Facility Type",\
                  "Status", "X Coordinate (State Plane)", "Y Coordinate (State Plane)", "Park Facility Name",\
                  "Park Borough", "School Name", "School Number", "School Region", "School Code",\
                  "School Phone Number", 
                  "School Address", "School City", "School State", "School Zip", "School Not Found",\
                  "School or Citywide Complaint", "Vehicle Type", "Taxi Company Borough", "Taxi Pick Up Location",\
                  "Bridge Highway Name", "Bridge Highway Direction", "Road Ramp", "Bridge Highway Segment",\
                  "Garage Lot Name", "Ferry Direction", "Ferry Terminal Name","Due Date", \
                  "Resolution Action Updated Date","Incident Address","Street Name","Cross Street 1","Cross Street 2",\
                  "Location Type","Resolution Description"]

noise_df.drop(delete_columns, axis=1, inplace=True)

In [None]:
#New shape after dropping columns
noise_df.shape

In [None]:
#New null sum by column after dropping columns
noise_df.isnull().sum()

In [None]:
#Drop NAs in several categories and print new null sums
noise_df = noise_df.dropna(subset=['Latitude',"Longitude","Complaint Type", "Incident Zip"], how='any')
noise_df.isnull().sum()

In [None]:
#Change incident_zip column datatype from float to str
#to map with GeoJSON
noise_df['Incident Zip'] = noise_df['Incident Zip'].astype(int)
noise_df['Incident Zip'] = noise_df['Incident Zip'].astype(str)
noise_df['Incident Zip'].dtype

In [None]:
#Cleaning column name syntax
noise_df.columns = ['unique_key','created_date', 'closed_date', 'agency', 'agency_name','complaint_type',\
                    'descriptor', 'incident_zip', 'city','community_board', 'borough', 'latitude', 'longitude',\
                    'location']

noise_df.head(5)

# Data Analysis

In [None]:
#count by borough
borough_count = sns.countplot(x="borough", data=noise_df)

In [None]:
#count by complaint type
complaint_count = sns.countplot(y="descriptor", data=noise_df)

# Choropleth Map

In [None]:
#choropleth map
zipcode_geo = r'data/ZIPNYC.geojson'

zip_data = noise_df["incident_zip"].value_counts()
zip_count = pd.DataFrame(zip_data).reset_index()
zip_count.columns = ["postalCode","count"]


#Let Folium determine the scale
map = folium.Map(location=[40.7128, -74.0059], zoom_start=11)
map.choropleth(geo_path=zipcode_geo, data=zip_count,
             columns=["postalCode","count"],
             key_on='feature.properties.postalCode',
             fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2)

map.save('html/noise_by_zipcode.html')
map

# Loud music / Party map

In [None]:
party_df = noise_df[noise_df['descriptor'].str.contains("Party",na=False)]
borough_count = sns.countplot(x="borough", data=party_df)

In [None]:
from bokeh.charts import Histogram, Bar, Scatter
from bokeh.io import output_notebook, push_notebook, show
output_notebook()

# List of months by number.
months = [timestring.parse(row[2])['month'] for row in party_df.itertuples()]
#print(sorted(months))

# Define the plot
plot = Histogram(months, xlabel='Months', ylabel='Frequency', title='Loud Music/Party Complaints by Month in 2016')
handle = show(plot, notebook_handle=True)  # Show the figure.

In [None]:
zip_data2 = party_df["incident_zip"].value_counts()
zip_count2 = pd.DataFrame(zip_data2).reset_index()
zip_count2.columns = ["postalCode","count"]

map2 = folium.Map(location=[40.7128, -74.0059], zoom_start=10)
map2.choropleth(geo_path=zipcode_geo, data=zip_count2,
             columns=["postalCode","count"],
             key_on='feature.properties.postalCode',
             fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2)

map2.save('html/noise_by_zipcode_party.html')
map2

# Barking dogs

In [None]:
dog_df = noise_df[noise_df['descriptor'].str.contains("Dog",na=False)]
borough_count = sns.countplot(x="borough", data=dog_df)

In [None]:
zip_data2 = dog_df["incident_zip"].value_counts()
zip_count2 = pd.DataFrame(zip_data2).reset_index()
zip_count2.columns = ["postalCode","count"]

map2 = folium.Map(location=[40.7128, -74.0059], zoom_start=10)
map2.choropleth(geo_path=zipcode_geo, data=zip_count2,
             columns=["postalCode","count"],
             key_on='feature.properties.postalCode',
             fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2)

map2.save('html/noise_by_zipcode_dog.html')
map2

# Ice cream trucks

In [None]:
ice_cream_df = noise_df[noise_df['descriptor'].str.contains("Ice Cream Truck",na=False)]
zip_data3 = ice_cream_df["incident_zip"].value_counts()
zip_count3 = pd.DataFrame(zip_data3).reset_index()
zip_count3.columns = ["postalCode","count"]

map3 = folium.Map(location=[40.7128, -74.0059], zoom_start=10)
map3.choropleth(geo_path=zipcode_geo, data=zip_count3,
             columns=["postalCode","count"],
             key_on='feature.properties.postalCode',
             fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2)

map3.save('html/noise_by_zipcode_ice_cream.html')
map3

# Point map

In [None]:
#from folium import plugins

#marker_cluster = folium.MarkerCluster().add_to(map)
#for name, row in noise_df.iterrows():
    ##folium.Marker([row["latitude"], row["longitude"]], popup="{0}: {1}".format(row["incident_zip"], row["complaint_type"])).add_to(marker_cluster)
    #folium.Marker([row["latitude"], row["longitude"]]).add_to(marker_cluster)
    
#map.save('noise_point_map.html')

In [None]:
#map