In [1]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
import dateutil
import datetime
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [2]:
# Load Data, parse Crash Date info
data = pd.read_csv("Motor_Vehicle_Collisions_Crashes_NYPD.csv", low_memory = False)
data['CRASH DATE'] = data['CRASH DATE'].apply(dateutil.parser.parse, dayfirst=True)

In [3]:
# Extract year, month info
data['year'] = pd.DatetimeIndex(data['CRASH DATE']).year
data['month'] = pd.DatetimeIndex(data['CRASH DATE']).month
data['season'] = (data['month']%12 + 3)//3
seasons = {1: '1 Winter', 2: '2 Spring', 3: '3 Summer', 4: '4 Autumn'}
data['season_name'] = data['season'].map(seasons)
data.head()

Unnamed: 0,CRASH DATE,CRASH TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,LOCATION,ON STREET NAME,CROSS STREET NAME,OFF STREET NAME,...,COLLISION_ID,VEHICLE TYPE CODE 1,VEHICLE TYPE CODE 2,VEHICLE TYPE CODE 3,VEHICLE TYPE CODE 4,VEHICLE TYPE CODE 5,year,month,season,season_name
0,2021-02-02,0:00,,,,,,NEW ENGLAND THRUWAY,,,...,4388602,Sedan,,,,,2021,2,1,1 Winter
1,2021-01-02,0:15,,,,,,QUEENSBORO BRIDGE UPPER,,,...,4388510,Sedan,Sedan,,,,2021,1,1,1 Winter
2,2021-01-02,15:30,,,40.697815,-73.7602,"(40.697815, -73.7602)",114 ROAD,,,...,4388639,Sedan,,,,,2021,1,1,1 Winter
3,2021-01-02,2:35,,,,,,PROSPECT EXPRESSWAY RAMP,,,...,4388200,Station Wagon/Sport Utility Vehicle,,,,,2021,1,1,1 Winter
4,2020-10-27,9:00,,,,,,BRUCKNER EXPRESSWAY,,,...,4361834,commercial,Sedan,,,,2020,10,4,4 Autumn


In [4]:
data.shape

(1752514, 33)

In [5]:
df = data.dropna(subset=['LATITUDE', 'LONGITUDE'])
df.head()

Unnamed: 0,CRASH DATE,CRASH TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,LOCATION,ON STREET NAME,CROSS STREET NAME,OFF STREET NAME,...,COLLISION_ID,VEHICLE TYPE CODE 1,VEHICLE TYPE CODE 2,VEHICLE TYPE CODE 3,VEHICLE TYPE CODE 4,VEHICLE TYPE CODE 5,year,month,season,season_name
2,2021-01-02,15:30,,,40.697815,-73.7602,"(40.697815, -73.7602)",114 ROAD,,,...,4388639,Sedan,,,,,2021,1,1,1 Winter
18,2021-01-26,22:12,,,40.724667,-73.82076,"(40.724667, -73.82076)",73 AVENUE,MAIN STREET,,...,4387384,,,,,,2021,1,1,1 Winter
29,2021-01-19,14:40,,,40.866447,-73.9305,"(40.866447, -73.9305)",RIVERSIDE DRIVE,,,...,4385331,Sedan,,,,,2021,1,1,1 Winter
31,2021-01-19,8:22,MANHATTAN,10000.0,40.774708,-73.97469,"(40.774708, -73.97469)",WEST DRIVE,OLMSTED WAY,,...,4385270,Bike,,,,,2021,1,1,1 Winter
32,2021-01-19,9:20,QUEENS,11354.0,40.761047,-73.83518,"(40.761047, -73.83518)",COLLEGE POINT BOULEVARD,36 ROAD,,...,4385175,Sedan,Station Wagon/Sport Utility Vehicle,,,,2021,1,1,1 Winter


In [6]:
df.shape

(1544467, 33)

In [7]:
import dill

In [8]:
dill.dump(df, open('df.pkd', 'wb'))

In [9]:
df = dill.load(open('df.pkd', 'rb'))

In [10]:
from IPython.display import IFrame
import ipyleaflet
from ipyleaflet import basemaps, GeoData, Map, Marker, LayersControl, ZoomControl, WidgetControl
from ipyleaflet import CircleMarker
from ipyleaflet import MarkerCluster

In [11]:
from ipywidgets import widgets
from IPython.display import display
from ipywidgets import interact, Dropdown

# Interactive Dropdown Menu to select collision data from specific month, year
year = Dropdown(options = list(range(2012,2022)))
month = Dropdown(options =list(range(1,13)))
output = widgets.Output()

def common_filtering(year, month):
    output.clear_output()
    global output_dataframe
    temp = df[(df['year'] == year) & (df['month'] == month)]
    df_filtered = temp[['LATITUDE', 'LONGITUDE']]
    
    with output:
        output_dataframe = df_filtered
        display(output_dataframe)

def dropdown_year(year):
    common_filtering(year.new, month.value)

def dropdown_month(month):
    common_filtering(year.value, month.new)

year.observe(dropdown_year, names='value')
month.observe(dropdown_month, names='value')

display(year)
display(month)
display(output)

Dropdown(options=(2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021), value=2012)

Dropdown(options=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), value=1)

Output()

In [12]:
map2 = Map(center=(40.7, -73.9), zoom = 10, basemap= basemaps.Esri.WorldStreetMap)

markers = []
for lon, lat in output_dataframe[["LONGITUDE", "LATITUDE"]].values:
    circle = CircleMarker(location=(lat, lon), color="black", fill_color="black", weight=3)
    markers.append(circle)
marker_cluster = MarkerCluster(markers=markers)
map2.add_layer(marker_cluster)
map2

Map(center=[40.7, -73.9], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_ou…