# MAT 328 - Final Project
### Tony Lu

---

This project uses the [Motor Vehicle Collision](https://data.cityofnewyork.us/Public-Safety/Motor-Vehicle-Collisions-Crashes/h9gi-nx95) dataset offered by [NYC Open Data](http://opendata.cityofnewyork.us/).

All data and values are dated March 2021 - the most recent date this notebook was executed. Executing this notebook at a later date may yield different results compared to what is already rendered in this notebook.

# Project Setup

### Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
sns.set_theme(style='darkgrid')

### Download and load the data

In [2]:
# This might take awhile. It has 1.7 million rows after all.
raw_data = pd.read_csv('https://data.cityofnewyork.us/api/views/h9gi-nx95/rows.csv?accessType=DOWNLOAD')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
df = raw_data.copy()

### Show the data

In [4]:
df.head()

Unnamed: 0,CRASH DATE,CRASH TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,LOCATION,ON STREET NAME,CROSS STREET NAME,OFF STREET NAME,NUMBER OF PERSONS INJURED,NUMBER OF PERSONS KILLED,NUMBER OF PEDESTRIANS INJURED,NUMBER OF PEDESTRIANS KILLED,NUMBER OF CYCLIST INJURED,NUMBER OF CYCLIST KILLED,NUMBER OF MOTORIST INJURED,NUMBER OF MOTORIST KILLED,CONTRIBUTING FACTOR VEHICLE 1,CONTRIBUTING FACTOR VEHICLE 2,CONTRIBUTING FACTOR VEHICLE 3,CONTRIBUTING FACTOR VEHICLE 4,CONTRIBUTING FACTOR VEHICLE 5,COLLISION_ID,VEHICLE TYPE CODE 1,VEHICLE TYPE CODE 2,VEHICLE TYPE CODE 3,VEHICLE TYPE CODE 4,VEHICLE TYPE CODE 5
0,12/04/2020,21:50,,,40.606346,-73.95273,"(40.606346, -73.95273)",OCEAN AVENUE,,,0.0,0.0,0,0,0,0,0,0,Driver Inattention/Distraction,Unspecified,,,,4373194,Sedan,Station Wagon/Sport Utility Vehicle,,,
1,12/12/2020,21:20,QUEENS,11355.0,40.756355,-73.82578,"(40.756355, -73.82578)",KISSENA BOULEVARD,MAPLE AVENUE,,1.0,0.0,1,0,0,0,0,0,Backing Unsafely,,,,,4375637,Sedan,,,,
2,11/08/2020,7:50,,,40.751457,-73.978165,"(40.751457, -73.978165)",EAST 41 STREET,,,0.0,0.0,0,0,0,0,0,0,Driver Inattention/Distraction,Unspecified,Unspecified,Unspecified,,4365601,Station Wagon/Sport Utility Vehicle,Sedan,Station Wagon/Sport Utility Vehicle,,
3,12/18/2020,8:00,,,40.598392,-73.90629,"(40.598392, -73.90629)",BELT PARKWAY,,,2.0,0.0,0,0,0,0,2,0,Driver Inattention/Distraction,Unspecified,,,,4378372,Station Wagon/Sport Utility Vehicle,Sedan,,,
4,11/12/2020,2:06,,,40.78739,-73.938225,"(40.78739, -73.938225)",FDR DRIVE,,,0.0,0.0,0,0,0,0,0,0,Unspecified,,,,,4367158,Station Wagon/Sport Utility Vehicle,,,,


In [5]:
# Show size of the data
print(f'Rows: {df.shape[0]} | Columns: {df.shape[1]}')

Rows: 1759301 | Columns: 29


In [6]:
df.describe(include='all')

Unnamed: 0,CRASH DATE,CRASH TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,LOCATION,ON STREET NAME,CROSS STREET NAME,OFF STREET NAME,NUMBER OF PERSONS INJURED,NUMBER OF PERSONS KILLED,NUMBER OF PEDESTRIANS INJURED,NUMBER OF PEDESTRIANS KILLED,NUMBER OF CYCLIST INJURED,NUMBER OF CYCLIST KILLED,NUMBER OF MOTORIST INJURED,NUMBER OF MOTORIST KILLED,CONTRIBUTING FACTOR VEHICLE 1,CONTRIBUTING FACTOR VEHICLE 2,CONTRIBUTING FACTOR VEHICLE 3,CONTRIBUTING FACTOR VEHICLE 4,CONTRIBUTING FACTOR VEHICLE 5,COLLISION_ID,VEHICLE TYPE CODE 1,VEHICLE TYPE CODE 2,VEHICLE TYPE CODE 3,VEHICLE TYPE CODE 4,VEHICLE TYPE CODE 5
count,1759301,1759301,1219060,1218849.0,1550757.0,1550757.0,1550757,1404929,1141270,262004,1759283.0,1759270.0,1759301.0,1759301.0,1759301.0,1759301.0,1759301.0,1759301.0,1754358,1510517,117754,25308,6605,1759301.0,1750055,1468751,114569,24597,6431
unique,3166,1440,5,445.0,,,228122,12095,18866,160448,,,,,,,,,61,61,50,39,29,,1196,1290,182,74,53
top,01/21/2014,16:00,BROOKLYN,11207.0,,,"(0.0, 0.0)",BROADWAY,3 AVENUE,772 EDGEWATER ROAD,,,,,,,,,Unspecified,Unspecified,Unspecified,Unspecified,Unspecified,,Sedan,PASSENGER VEHICLE,Sedan,Sedan,Sedan
freq,1161,25440,381973,15341.0,,,1280,17194,9843,402,,,,,,,,,630275,1269988,109574,23857,6220,,429277,318607,30530,7025,1899
mean,,,,,40.69055,-73.86784,,,,,0.2726406,0.001255634,0.05146533,0.0006604896,0.02280679,9.549247e-05,0.1982037,0.0004984934,,,,,,2909089.0,,,,,
std,,,,,1.172197,2.375297,,,,,0.6679087,0.0374716,0.2333785,0.02623879,0.1508778,0.009829559,0.6296716,0.02441334,,,,,,1502986.0,,,,,
min,,,,,0.0,-201.36,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,22.0,,,,,
25%,,,,,40.66862,-73.97605,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,2865669.0,,,,,
50%,,,,,40.72218,-73.92844,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,3515808.0,,,,,
75%,,,,,40.76881,-73.86667,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,3955871.0,,,,,
