# Assessing the Impact of Daylight Savings Time on Driving Fatalities

- Anastasiades, Andrew
- Drozdzewicz, Peter
- Garcia, Viera
- Sperry, Taylor
- Surapanemi, Sindhura

### DEPENDENCIES

In [1]:
import pandas as pd #data manipulation
import os #Interoperability
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np
import datetime as dt

### FILE PATHS

In [2]:
accident_csv = os.path.join("Resources", "2019.csv")


### LOAD DATAFRAME

In [3]:
raw_df = pd.read_csv(accident_csv) #We won't keep this in the final code
raw_df.columns

  interactivity=interactivity, compiler=compiler, result=result)


Index(['STATE', 'STATENAME', 'ST_CASE', 'VE_TOTAL', 'VE_FORMS', 'PVH_INVL',
       'PEDS', 'PERSONS', 'PERMVIT', 'PERNOTMVIT', 'COUNTY', 'COUNTYNAME',
       'CITY', 'CITYNAME', 'DAY', 'DAYNAME', 'MONTH', 'MONTHNAME', 'YEAR',
       'DAY_WEEK', 'DAY_WEEKNAME', 'HOUR', 'HOURNAME', 'MINUTE', 'MINUTENAME',
       'NHS', 'NHSNAME', 'ROUTE', 'ROUTENAME', 'TWAY_ID', 'TWAY_ID2',
       'RUR_URB', 'RUR_URBNAME', 'FUNC_SYS', 'FUNC_SYSNAME', 'RD_OWNER',
       'RD_OWNERNAME', 'MILEPT', 'MILEPTNAME', 'LATITUDE', 'LATITUDENAME',
       'LONGITUD', 'LONGITUDNAME', 'SP_JUR', 'SP_JURNAME', 'HARM_EV',
       'HARM_EVNAME', 'MAN_COLL', 'MAN_COLLNAME', 'RELJCT1', 'RELJCT1NAME',
       'RELJCT2', 'RELJCT2NAME', 'TYP_INT', 'TYP_INTNAME', 'WRK_ZONE',
       'WRK_ZONENAME', 'REL_ROAD', 'REL_ROADNAME', 'LGT_COND', 'LGT_CONDNAME',
       'WEATHER1', 'WEATHER1NAME', 'WEATHER2', 'WEATHER2NAME', 'WEATHER',
       'WEATHERNAME', 'SCH_BUS', 'SCH_BUSNAME', 'RAIL', 'RAILNAME', 'NOT_HOUR',
       'NOT_HOURNAME', 'NOT

In [4]:
columns_to_keep = ['STATENAME', 'DAY', 'MONTH','YEAR','HOUR','DAY_WEEK','DAY_WEEKNAME', 'LGT_CONDNAME',
                   'DRUNK_DR', 'WEATHERNAME', 'RUR_URBNAME', 'FATALS',]
df = pd.read_csv(accident_csv, usecols=columns_to_keep)

### CLEAN DATAFRAME

In [5]:
# EXCLUDE DRUNK DRIVERS #Why do we do this?
df = df[df["DRUNK_DR"]==0] 
df.drop(axis=1, labels=["DRUNK_DR"], inplace=True)

In [None]:
# ADD DATE AND WEEKNUM
# We do this so we can compare week to week changes
df["DATE"] = pd.to_datetime(df[["YEAR", "MONTH", "DAY"]])
#df["WEEK_NUM"] = df["DATE"].dt.week
df['WEEK_NUM'] = df['DATE'].apply(lambda x: (x + dt.timedelta(days=1)).week)

In [None]:
# SPLIT SAMPLE AND CONTROL
control_states = ["Arizona", "Hawaii"] #These states do not observe DLS
control_df = df[df.loc[:,'STATENAME'].isin(control_states)] #NO DLS
sample_df = df[~df.loc[:,'STATENAME'].isin(control_states)] #

In [None]:
sample_df

In [None]:
start_date = '2019-03-03'
end_date = '2019-03-09'

sample_gb = sample_df.groupby("DATE")
summary = pd.DataFrame(sample_gb["FATALS"].sum())
summary.reset_index(inplace=True)
summary.plot(x="DATE", y="FATALS", kind="line")
plt.xlim(start_date, end_date)
plt.show()

In [None]:
start_date = '2019-03-10'
end_date = '2019-03-16'
summary.plot(x="DATE", y="FATALS", kind="line")
plt.xlim(start_date, end_date)
plt.show()

In [None]:
start_date = '2019-03-03'
end_date = '2019-03-09'

sample_gb = control_df.groupby("DATE")
summary = pd.DataFrame(sample_gb["FATALS"].sum())
summary.reset_index(inplace=True)
summary.plot(x="DATE", y="FATALS", kind="line")
plt.xlim(start_date, end_date)
plt.show()

In [None]:
start_date = pd.to_datetime('2019-03-10')
end_date = pd.to_datetime('2019-03-16')

sample_gb = control_df.groupby("DATE")
summary = pd.DataFrame(sample_gb["FATALS"].sum())
summary.reset_index(inplace=True)
summary.plot(x="DATE", y="FATALS", kind="line")
plt.xlim(start_date, end_date)
plt.show()

### REVIEW WEEK-OVER-WEEK CHANGE IN FATALITIES

In [None]:
week_gb = sample_df.groupby(by=["WEEK_NUM"])
week_summary = pd.DataFrame(week_gb["FATALS"].sum())
#week_summary["WEEKLY_CHANGE"]
weekly_change = []
for index, row in week_summary.iterrows(): # WE WANT DELTA! FOR EACH WEEK
    if index==1:
        weekly_change.append(np.nan)
    else:
        this_num = week_summary.loc[index, "FATALS"] # FATALITIES FROM THIS WEEK
        last_num = week_summary.loc[index-1, "FATALS"] # FATALITIES FROM LAST WEEK
        delta = (this_num / last_num) - 1
        weekly_change.append(delta)
week_summary["WEEKLY_CHANGE"] = weekly_change
week_summary["Z_SCORE"] = st.zscore(weekly_change, nan_policy='omit')
plt.hist(week_summary["WEEKLY_CHANGE"], density=True)


In [None]:
start_date.week # <- this is going to be off! need to add 1


In [None]:
week_summary

In [None]:
sample_df[sample_df["WEEK_NUM"]==11].sort_values(by="DATE")

In [None]:
nov_start = pd.to_datetime("2019-11-03")

In [None]:
sample_df[sample_df["WEEK_NUM"]==45].sort_values(by="DATE")