In [2]:
# Dependencies

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

#Read cell phone national data statistics
smart_phone_data = pd.read_csv("Resources/smartphone-users-2010-2023.csv")
smart_phone_data_df = pd.DataFrame(smart_phone_data)

#Delete NaN rows
smart_phone_data_df=smart_phone_data_df.dropna(how='any')

#Rename
total_smartphones_df=smart_phone_data_df.rename(columns={'Smartphone users in the United States 2010-2023':'Year',
                                                                'Unnamed: 1':'Total Smartphone Users (millions)'})
# Read NHTSA all accidents CSV
all_accidents = pd.read_csv("Resources/NHTSA - All Accidents - All Drivers - 2004-2018.csv")
all_accidents_df = pd.DataFrame(all_accidents)

#Delete NaN rows
all_accidents_df=all_accidents_df.dropna(how='any')

#Rename columns

all_accidents_df=all_accidents_df.rename(columns={' Crash Date (Year) ':'Year',
                                                    'Crash Date (Month)':'January',
                                                    'Unnamed: 2':'February',
                                                    'Unnamed: 3':'March',
                                                    'Unnamed: 4':'April',
                                                    'Unnamed: 5':'May',
                                                    'Unnamed: 6':'June',
                                                    'Unnamed: 7':'July',
                                                    'Unnamed: 8':'August',
                                                    'Unnamed: 9':'September',
                                                    'Unnamed: 10':'October',
                                                    'Unnamed: 11':'November',
                                                    'Unnamed: 12':'December',
                                                    'Unnamed: 13':'Total Accidents',
                                                   })

all_accidents_yearly_df=all_accidents_df.loc[:,['Year', 'Total Accidents']]

# Read NHTSA all accidents younger(drivers) CSV
young_driver_accidents = pd.read_csv("Resources/NHTSA - All Accidents - Young Drivers (15-20) 2004-2018.csv")
young_driver_accidents_df = pd.DataFrame(young_driver_accidents)

#Delete NaN rows
young_driver_accidents_df=young_driver_accidents_df.dropna(how='any')

young_driver_accidents_df=young_driver_accidents_df.rename(columns={'Crash Date (Year)':'Year',
                                                    'Crash Date (Month)':'January',
                                                    'Unnamed: 2':'February',
                                                    'Unnamed: 3':'March',
                                                    'Unnamed: 4':'April',
                                                    'Unnamed: 5':'May',
                                                    'Unnamed: 6':'June',
                                                    'Unnamed: 7':'July',
                                                    'Unnamed: 8':'August',
                                                    'Unnamed: 9':'September',
                                                    'Unnamed: 10':'October',
                                                    'Unnamed: 11':'November',
                                                    'Unnamed: 12':'December',
                                                    'Unnamed: 13':'Total Young Driver Accidents',
                                                   })

young_driver_accidents_df=young_driver_accidents_df.loc[:,['Year', 'Total Young Driver Accidents']]


# Read NHTSA all accidents older drivers CSV
older_driver_accidents = pd.read_csv("Resources/NHTSA - All Accidents - Older Drivers (65+) 2004-2018.csv")
older_driver_accidents_df = pd.DataFrame(older_driver_accidents)

#Delete NaN rows
older_driver_accidents_df=older_driver_accidents_df.dropna(how='any')

#Rename Columns
older_driver_accidents_df=older_driver_accidents_df.rename(columns={'Crash Date (Year)':'Year',
                                                    'Crash Date (Month)':'January',
                                                    'Unnamed: 2':'February',
                                                    'Unnamed: 3':'March',
                                                    'Unnamed: 4':'April',
                                                    'Unnamed: 5':'May',
                                                    'Unnamed: 6':'June',
                                                    'Unnamed: 7':'July',
                                                    'Unnamed: 8':'August',
                                                    'Unnamed: 9':'September',
                                                    'Unnamed: 10':'October',
                                                    'Unnamed: 11':'November',
                                                    'Unnamed: 12':'December',
                                                    'Unnamed: 13':'Total Older Driver Accidents',
                                                   })
#Reduce df for relevant columns
older_driver_accidents_df=older_driver_accidents_df.loc[:,['Year', 'Total Older Driver Accidents']]

# Read NHTSA all accidents other drivers CSV
other_driver_accidents = pd.read_csv("Resources/NHTSA - All Accidents - Drivers (21-64) 2004-2018.csv")
other_driver_accidents_df = pd.DataFrame(other_driver_accidents)

#Delete NaN rows
other_driver_accidents_df=other_driver_accidents_df.dropna(how='any')

#Rename Columns
other_driver_accidents_df=other_driver_accidents_df.rename(columns={'Crash Date (Year)':'Year',
                                                    'Crash Date (Month)':'January',
                                                    'Unnamed: 2':'February',
                                                    'Unnamed: 3':'March',
                                                    'Unnamed: 4':'April',
                                                    'Unnamed: 5':'May',
                                                    'Unnamed: 6':'June',
                                                    'Unnamed: 7':'July',
                                                    'Unnamed: 8':'August',
                                                    'Unnamed: 9':'September',
                                                    'Unnamed: 10':'October',
                                                    'Unnamed: 11':'November',
                                                    'Unnamed: 12':'December',
                                                    'Unnamed: 13':'Total Other Driver Accidents',
                                                   })
#Reduce df for relevant columns
other_driver_accidents_df=other_driver_accidents_df.loc[:,['Year', 'Total Other Driver Accidents']]
other_driver_accidents_df.head(18)

#Merge
merge_table_df = pd.merge(all_accidents_yearly_df, young_driver_accidents_df, on="Year", how="left")
merge_table_df2 = pd.merge(merge_table_df, older_driver_accidents_df, on="Year", how="left")
all_accident_graph_df = pd.merge(merge_table_df2, other_driver_accidents_df, on="Year", how="left")

#Except for "Year", change to float
all_accident_graph_df['Total Accidents'] = all_accident_graph_df['Total Accidents'].astype(float)
all_accident_graph_df['Total Young Driver Accidents'] = all_accident_graph_df['Total Young Driver Accidents'].astype(float)
all_accident_graph_df['Total Older Driver Accidents'] = all_accident_graph_df['Total Older Driver Accidents'].astype(float)
all_accident_graph_df['Total Other Driver Accidents'] = all_accident_graph_df['Total Other Driver Accidents'].astype(float)

year = [2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]
year2 = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]

total_smartphones = [62.6, 92.8, 122, 144.5, 171.0, 190.64, 208.61, 246.6, 257.3]

#year = all_accident_graph_df["Year"]
total_drivers = all_accident_graph_df["Total Accidents"]
younger_drivers = all_accident_graph_df["Total Young Driver Accidents"]
older_drivers = all_accident_graph_df["Total Older Driver Accidents"]
all_other_drivers = all_accident_graph_df["Total Other Driver Accidents"]


#Plot using Plotly
x = year
y0 = total_drivers
y1 = younger_drivers
y2 = older_drivers
y3 = all_other_drivers

fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=year, y=y0,
                    mode='lines',
                    name='All Drivers'))
fig.add_trace(go.Scatter(x=year, y=y1,
                    mode='lines',
                    name='Drivers: Ages 15-21'))
fig.add_trace(go.Scatter(x=year, y=y2,
                    mode='lines',
                    name='Drivers: Ages 65+'))
fig.add_trace(go.Scatter(x=year, y=y3,
                    mode='lines',
                    name='Drivers: Ages 21-64'))

# Edit the layout
fig.update_layout(title='U. S. Automobile Accidents (Including Fatalities) - All States - All Drivers',
                   xaxis_title='Year',
                   yaxis_title='Accidents (millions)')

fig.show()

FileNotFoundError: [Errno 2] File b'Resources/smartphone-users-2010-2023.csv' does not exist: b'Resources/smartphone-users-2010-2023.csv'

In [None]:
#Reduce Dataframe to 2010-2018
reduced_all_accident_graph_df=all_accident_graph_df.drop(all_accident_graph_df.index[[0, 1, 2, 3, 4, 5, 15]])

#Set index
reduced_all_accident_graph_df.set_index('Year',inplace=True)


In [None]:
#Graph reesuced time frame 2010 - 2018 for onset of smartphones: Total Accidents versus Total Smartphones on data from: 2010 - 2018

total_drivers2=reduced_all_accident_graph_df['Total Accidents'] 
younger_drivers2=reduced_all_accident_graph_df['Total Young Driver Accidents']
older_drivers2=reduced_all_accident_graph_df['Total Older Driver Accidents']
all_other_drivers2=reduced_all_accident_graph_df['Total Other Driver Accidents']

smartphones=total_smartphones
accidents=total_drivers2
fig = px.scatter(all_accident_graph_df, x=smartphones, y=accidents, trendline="ols")

# Edit the layout
fig.update_layout(title='Total Accidents versus Total Smartphones: 2010 - 2018                           (R-squared = 0.862539)',
                   xaxis_title='Smartphones (millions)',
                   yaxis_title='Accidents (millions)')

fig.show()

results = px.get_trendline_results(fig)


In [None]:
#Graph Accidents involving older drivers versus Total Smartphones: 2010 - 2018


smartphones=total_smartphones

accidents=older_drivers2

fig = px.scatter(all_accident_graph_df, x=smartphones, y=accidents, trendline="ols")

# Edit the layout
fig.update_layout(title='Accidents: Drivers Aged 65+ versus Total Smartphones: 2010 - 2018        (R-squared = 0.936084)',
                   xaxis_title='Smartphones (millions)',
                   yaxis_title='Accidents (millions)')

fig.show()

results = px.get_trendline_results(fig)

In [None]:
smartphones=total_smartphones

accidents=younger_drivers2

fig = px.scatter(all_accident_graph_df, x=smartphones, y=accidents, trendline="ols")

# Edit the layout
fig.update_layout(title='Accidents: Drivers Aged 15-20 versus Total Smartphones: 2010 - 2018         (R-squared = 0.618578)',
                   xaxis_title='Smartphones (millions)',
                   yaxis_title='Accidents (millions)')

fig.show()

results = px.get_trendline_results(fig)

In [None]:
smartphones=total_smartphones

accidents=all_other_drivers2

fig = px.scatter(all_accident_graph_df, x=smartphones, y=accidents, trendline="ols")

# Edit the layout
fig.update_layout(title='Accidents: Drivers Aged 21-64 versus Total Smartphones: 2010 - 2018   (R-squared = 0.856343)',
                   xaxis_title='Smartphones (millions)',
                   yaxis_title='Accidents (millions)')
                  
fig.show()

results = px.get_trendline_results(fig)

In [None]:
#Read in distracted driver fatality information
# Read cell phone usage CSV
distracted_driver_data = pd.read_csv("Resources/Fatalities - Distracted Drivers.csv")
distracted_driver_data_df = pd.DataFrame(distracted_driver_data)


In [None]:
#Delete NaN rows
distracted_driver_data_df=distracted_driver_data_df.dropna(how='any')

In [None]:
#Reduce Dataframe to 2010-2018
reduced_distracted_driver_data_df=distracted_driver_data_df.drop(distracted_driver_data_df.index[[0, 1, 2, 3, 4, 5]])

#Set index
reduced_distracted_driver_data_df.set_index('Year',inplace=True)
reduced_distracted_driver_data_df

In [None]:
#Plot

#year = all_accident_graph_df["Year"]
total_distracted_fatalities = distracted_driver_data_df["All Drivers - Distracted Fatalities "]
total_fatalities = distracted_driver_data_df["All Drivers - Total Fatalities"]
percent_distracted = distracted_driver_data_df["% of Total - All Drivers"]

# Create figure with secondary y-axis
#fig = make_subplots(specs=[[{"secondary_y": True}]])

x = year
y0 = total_distracted_fatalities
y1 = total_fatalities
#y2 = percent_distracted 

fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=year, y=y0,
                    mode='lines',
                    name='Distracted Driver Fatailties')) 

fig.add_trace(go.Scatter(x=year, y=y1,
                    mode='lines',
                    name='Total Fatalties'))

#fig.add_trace(go.Scatter(x=year, y=y2,
#                    mode='lines',
#                    name='Percent of Total'))


# Edit the layout
fig.update_layout(title='Distracted Driver Fatalities Versus All Fatalities',
                   xaxis_title='Year',
                   yaxis_title='Fatalities (thousands)')

fig.show()

In [None]:
#Plot using Plotly

#year = all_accident_graph_df["Year"]
total_drivers = distracted_driver_data_df["All Drivers - Distracted Fatalities "]
younger_drivers = distracted_driver_data_df["Younger Drivers - Distracted Fatalities "]
older_drivers = distracted_driver_data_df["Older Drivers - Distracted Fatalities "]
all_other_drivers = distracted_driver_data_df["All Other Drivers - Distracted Fatalities "]

x = year
y0 = total_drivers
y1 = younger_drivers
y2 = older_drivers
y3 = all_other_drivers

fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=year, y=y0,
                    mode='lines',
                    name='All Drivers'))
fig.add_trace(go.Scatter(x=year, y=y1,
                    mode='lines',
                    name='Drivers: Ages 15-21'))
fig.add_trace(go.Scatter(x=year, y=y2,
                    mode='lines',
                    name='Drivers: Ages 65+'))
fig.add_trace(go.Scatter(x=year, y=y3,
                    mode='lines',
                   name='Drivers: Ages 21-64'))

# Edit the layout
fig.update_layout(title='U. S. Automobile Accidents Fatalities - Distracted Drivers',
                   xaxis_title='Year',
                   yaxis_title='Fatalities (thousands)')

fig.show()

In [None]:
#Graph reduced time frame 2010 - 2018 for onset of smartphones: Total Accidents versus Total Smartphones on data from: 2010 - 2018

distracted_drivers = reduced_distracted_driver_data_df["All Drivers - Distracted Fatalities "]

smartphones=total_smartphones

accidents=total_drivers2

fig = px.scatter(reduced_distracted_driver_data_df, x=smartphones, y=distracted_drivers, trendline="ols")

# Edit the layout
fig.update_layout(title='Distracted Driver Fatalities versus Total Smartphones: 2010 - 2018          (R-squared = 0.068556)',
                   xaxis_title='Smartphones (millions)',
                   yaxis_title='Accidents (thousands)')

fig.show()

results = px.get_trendline_results(fig)