<a href="https://colab.research.google.com/github/victorialarrazolo/USaccidentsanaysis/blob/main/USaccidentsanalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Accidents in the United States Analysis 


In [17]:
# Import numpy, pandas, matpltlib.pyplot, sklearn modules and seaborn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import  date
%matplotlib inline
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)
plt.style.use('ggplot')

In [18]:
# Define lists of states, colors, linestyles, month order, day_order, and hour_order
# For this notebook, we will focus on the following three states: DE, FL, & CA
state_lst=['DE','FL','CA']
state_lst_full=['Delaware','Florida','California']

# We will focus on accident severities 2, 3 & 4
severity_lst=[2,3,4]

# Set a list of colors, markers and linestyles for plotting
color_lst=['r','b','k']
marker_lst=['D','o','*']
linestyle_lst=['dashed','dashdot','solid']

# Set a list of month, weekday, hour for reindex purpose and time_duraction to clear the accident
month_lst = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul','Aug','Sep','Oct','Nov','Dec']
weekday_lst = [ 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
weekday_lst_full = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
hour_lst= np.arange(24)
td='Time_Duration(min)'

In [8]:
import opendatasets as od

download_url = 'https://www.kaggle.com/sobhanmoosavi/us-accidents'

od.download(download_url)

Skipping, found downloaded files in "./us-accidents" (use force=True to force download)


In [22]:
# read & load the dataset into pandas dataframe
df = pd.read_csv('/content/us-accidents/US_Accidents_Dec20_updated.csv')
df = df[df.State.isin(state_lst)]


In [21]:
df.head()

Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),Description,Number,Street,Side,City,County,State,Zipcode,Country,Timezone,Airport_Code,Weather_Timestamp,Temperature(F),Wind_Chill(F),Humidity(%),Pressure(in),Visibility(mi),Wind_Direction,Wind_Speed(mph),Precipitation(in),Weather_Condition,Amenity,Bump,Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
1012,A-2717612,3,2016-03-22 18:53:11,2016-03-23 00:53:11,38.82584,-120.029214,38.827194,-120.030632,0.121,Between Twin Bridges Tract and South Lake Taho...,21101.0,US Highway 50,L,Echo Lake,El Dorado,CA,95721,US,US/Pacific,KTVL,2016-03-22 18:53:00,28.9,,85.0,30.18,10.0,Calm,,,Clear,False,False,False,False,False,False,False,False,False,False,False,False,False,Day,Day,Day,Day
1013,A-2717613,2,2016-03-22 19:00:49,2016-03-23 01:00:49,37.358209,-121.840017,37.361596,-121.842044,0.259,Between Capitol Expy and Alum Rock Ave - Accid...,,I-680 N,R,San Jose,Santa Clara,CA,95116,US,US/Pacific,KRHV,2016-03-22 18:48:00,57.2,,63.0,,10.0,WNW,17.3,,Scattered Clouds,False,False,False,False,False,False,False,False,False,False,False,False,False,Day,Day,Day,Day
1014,A-2717614,3,2016-03-22 20:07:32,2016-03-23 02:07:32,37.881943,-122.307987,37.885882,-122.308878,0.276,At I-80/I-580 Northern Split - Accident.,,I-580 W,R,Berkeley,Alameda,CA,94710,US,US/Pacific,KOAK,2016-03-22 19:53:00,57.0,,64.0,30.24,10.0,NW,8.1,,Partly Cloudy,False,False,False,False,False,False,False,False,False,False,False,False,False,Night,Night,Day,Day
1015,A-2717615,2,2016-03-22 21:40:18,2016-03-23 03:40:18,37.881038,-122.307788,37.883458,-122.308366,0.17,At I-80/I-580 Northern Split - Accident.,,I-580 W,R,Berkeley,Alameda,CA,94710,US,US/Pacific,KOAK,2016-03-22 21:53:00,55.0,,67.0,30.26,10.0,NW,8.1,,Clear,False,False,False,False,False,False,False,False,False,False,False,False,False,Night,Night,Night,Night
1016,A-2717616,2,2016-03-22 21:36:42,2016-03-23 03:36:42,38.518811,-121.101664,38.518811,-121.101664,0.0,Between Latrobe Rd/Indio Dr and Latrobe Rd - A...,14664.0,Latrobe Rd,R,Sloughhouse,Sacramento,CA,95683-9709,US,US/Pacific,KMHR,2016-03-22 20:45:00,55.4,,51.0,30.23,10.0,NNW,3.5,,Clear,False,False,False,False,False,False,False,False,False,False,False,False,False,Night,Night,Night,Night


In [23]:
# Extract year, month, day, hour, & weekday information

# Convert Start_Time and End_Time to datetypes
df['Start_Time'] = pd.to_datetime(df['Start_Time'], errors='coerce')
df['End_Time'] = pd.to_datetime(df['End_Time'], errors='coerce')

# Extract year, month, day, hour, weekday and time_duration information
df['Start_Year']=df['Start_Time'].dt.year
df['Start_Month']=df['Start_Time'].dt.strftime('%b')
df['Start_Day']=df['Start_Time'].dt.day
df['Start_Hour']=df['Start_Time'].dt.hour
df['Start_Weekday']=df['Start_Time'].dt.strftime('%a')

# Extract the amount of time in the unit of minutes for each accident, round to the nearest integer
td='Time_Duration(min)'
df[td]=round((df['End_Time']-df['Start_Time'])/np.timedelta64(1,'m'))

# Check the dataframe
df.head()

Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),Description,Number,Street,Side,City,County,State,Zipcode,Country,Timezone,Airport_Code,Weather_Timestamp,Temperature(F),Wind_Chill(F),Humidity(%),Pressure(in),Visibility(mi),Wind_Direction,Wind_Speed(mph),Precipitation(in),Weather_Condition,Amenity,Bump,Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight,Start_Year,Start_Month,Start_Day,Start_Hour,Start_Weekday,Time_Duration(min)
1012,A-2717612,3,2016-03-22 18:53:11,2016-03-23 00:53:11,38.82584,-120.029214,38.827194,-120.030632,0.121,Between Twin Bridges Tract and South Lake Taho...,21101.0,US Highway 50,L,Echo Lake,El Dorado,CA,95721,US,US/Pacific,KTVL,2016-03-22 18:53:00,28.9,,85.0,30.18,10.0,Calm,,,Clear,False,False,False,False,False,False,False,False,False,False,False,False,False,Day,Day,Day,Day,2016,Mar,22,18,Tue,360.0
1013,A-2717613,2,2016-03-22 19:00:49,2016-03-23 01:00:49,37.358209,-121.840017,37.361596,-121.842044,0.259,Between Capitol Expy and Alum Rock Ave - Accid...,,I-680 N,R,San Jose,Santa Clara,CA,95116,US,US/Pacific,KRHV,2016-03-22 18:48:00,57.2,,63.0,,10.0,WNW,17.3,,Scattered Clouds,False,False,False,False,False,False,False,False,False,False,False,False,False,Day,Day,Day,Day,2016,Mar,22,19,Tue,360.0
1014,A-2717614,3,2016-03-22 20:07:32,2016-03-23 02:07:32,37.881943,-122.307987,37.885882,-122.308878,0.276,At I-80/I-580 Northern Split - Accident.,,I-580 W,R,Berkeley,Alameda,CA,94710,US,US/Pacific,KOAK,2016-03-22 19:53:00,57.0,,64.0,30.24,10.0,NW,8.1,,Partly Cloudy,False,False,False,False,False,False,False,False,False,False,False,False,False,Night,Night,Day,Day,2016,Mar,22,20,Tue,360.0
1015,A-2717615,2,2016-03-22 21:40:18,2016-03-23 03:40:18,37.881038,-122.307788,37.883458,-122.308366,0.17,At I-80/I-580 Northern Split - Accident.,,I-580 W,R,Berkeley,Alameda,CA,94710,US,US/Pacific,KOAK,2016-03-22 21:53:00,55.0,,67.0,30.26,10.0,NW,8.1,,Clear,False,False,False,False,False,False,False,False,False,False,False,False,False,Night,Night,Night,Night,2016,Mar,22,21,Tue,360.0
1016,A-2717616,2,2016-03-22 21:36:42,2016-03-23 03:36:42,38.518811,-121.101664,38.518811,-121.101664,0.0,Between Latrobe Rd/Indio Dr and Latrobe Rd - A...,14664.0,Latrobe Rd,R,Sloughhouse,Sacramento,CA,95683-9709,US,US/Pacific,KMHR,2016-03-22 20:45:00,55.4,,51.0,30.23,10.0,NNW,3.5,,Clear,False,False,False,False,False,False,False,False,False,False,False,False,False,Night,Night,Night,Night,2016,Mar,22,21,Tue,360.0
