In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Data Visualization

The `Traffic_Crashes_-_Crashes.csv` file  shows information about each traffic crash on city streets within the City of Chicago limits and under the jurisdiction of Chicago Police Department (CPD).
It can be downloaded from this [link](https://data.cityofchicago.org/Transportation/Traffic-Crashes-Crashes/85ca-t3if).

In [None]:
data = pd.read_csv('C:\\Users\javier.perez-alvaro\\Downloads\\Traffic_Crashes_-_Crashes.csv', index_col = 'CRASH_DATE', parse_dates=True)
data.head()

In [None]:
# sort index
data.sort_index(inplace=True)
data.head()

In [None]:
# filter out years 2013--2017
data = data[data.index.year>=2018]

In [None]:
data.columns

## How have the weekly number of crashes changed over time?

In [None]:
data.resample('W').CRASH_RECORD_ID.count().plot(figsize=(15,5)) 

## How has the injury rate changed over time?

In [None]:
# INJURIES = 1 if the crash involves injuries
data['INJURIES'] = (data.INJURIES_TOTAL>=1).astype(int) 
data.INJURIES

In [None]:
data.groupby('INJURIES').resample('W').CRASH_RECORD_ID.count().unstack(level=0).plot(figsize=(15,5))

In [None]:
# number of crashes that involve injuries
n_injuries = data[data.INJURIES==1].resample('W').CRASH_RECORD_ID.count()
n_injuries

In [None]:
# total number 
n_total = data.resample('W').CRASH_RECORD_ID.count()
n_total

In [None]:
# percentage of crashes that invole injuries 
pct_weekly_injuries = 100*n_injuries/n_total
pct_weekly_injuries.plot(figsize=(15,5))

## How does the injury rate change through the week?

In [None]:
table = data.pivot_table(values='CRASH_RECORD_ID', index=data.index.dayofweek, columns='INJURIES',aggfunc='count')
table['total'] = table.sum(axis=1)
table['pct_injuries'] = 100*table[1]/table['total']
table.index = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']
table

In [None]:
table['pct_injuries'].plot.bar()

## Plot daily crashes histogram

In [None]:
data.resample('D').CRASH_RECORD_ID.count().hist(bins=20)

## How do injuries vary with first crash type?

In [None]:
table = data.pivot_table(values='CRASH_RECORD_ID',index = 'FIRST_CRASH_TYPE', columns='INJURIES',aggfunc='count')
table['total'] = table.sum(axis=1)
table['pct_injuries'] = 100*table[1]/table['total']
table

In [None]:
table.pct_injuries.plot.bar(figsize=(15,5))

## Are injuries more likely in different locations?

In [None]:
mask = (data.LATITUDE!= 0) & (data.LONGITUDE!=0)
data[mask & (data.INJURIES==1)].plot.scatter(x='LONGITUDE', y ='LATITUDE', figsize=(7,7), s=0.1, alpha=0.5)

In [None]:
data[mask & (data.INJURIES==1)].plot.hexbin(x='LONGITUDE', y ='LATITUDE', gridsize=20,  figsize=(7,7),cmap='Reds')

## Area plot

In [None]:
# weekly number of crashes
data.resample('W').CRASH_RECORD_ID.count().plot(figsize=(15,5)) 

In [None]:
# weekly number of crashes by crash type
data.groupby('FIRST_CRASH_TYPE').resample('W').CRASH_RECORD_ID.count().unstack(level=0).plot(figsize=(15,7),legend=False)

In [None]:
# the plots are stacked
data.groupby('FIRST_CRASH_TYPE').resample('W').CRASH_RECORD_ID.count().unstack(level=0).plot.area(figsize=(15,7))

## Advanced plot

Pandas uses the `matplotlib` library

In [None]:
import matplotlib.pyplot as plt

In [None]:
# weekly number of crashes by injury status (0=no, 1=yes)
weekly = data.groupby('INJURIES').resample('W').CRASH_RECORD_ID.count().unstack(level=0)
weekly

In [None]:
# create figure and axis
fig, axis = plt.subplots(figsize=(15,5))
# plot weekly dataframe
weekly.plot(ax=axis)
# change x-axis label
axis.set_xlabel('date', fontsize=15)
# add y-axis label
axis.set_ylabel('weekly number of crashes', fontsize=15)
# add title
axis.set_title('Number of traffic crashes per week', fontsize=20)
# modify the legend
axis.legend(labels=['no','yes'], fontsize=15, loc = 'upper left',title='injuries?', title_fontsize=18)
# add grid lines
axis.grid(True)

In [None]:
# create figure and axis
fig, axis = plt.subplots(1,2,figsize=(15,5))
# plot weekly dataframe
weekly[0].plot(ax=axis[0])
weekly[1].plot(ax=axis[1],color='red')
# change x-axis labels
axis[0].set_xlabel('date', fontsize=15)
axis[1].set_xlabel('date', fontsize=15)
# set titles
axis[0].set_title('injuries = 0')
axis[1].set_title('injuries = 1')


fig.suptitle('Number of traffic crashes per week', fontsize=20)
axis[0].grid(True)
axis[1].grid(True)

In [None]:
# create figure and axis
fig, axis = plt.subplots(figsize=(15,5))
# create a twin axis sharing the x-axis.
axis2 = axis.twinx()
# plot weekly dataframe (injuries=0)
weekly[0].plot(ax=axis)
# plot weekly dataframe (injuries=1)
weekly[1].plot(ax=axis2,color='C1')
# change x-axis labels
axis.set_xlabel('date', fontsize=15)
# add y-axis label
axis.set_ylabel('injuries=0', fontsize=15)
axis2.set_ylabel('injuries=1', fontsize=15)
# add title
axis.set_title('Number of traffic crashes per week', fontsize=20)
# add legend
fig.legend(labels=['no','yes'], fontsize=15, loc = 'upper left',title='injuries?', title_fontsize=18)
# grid lines
axis.grid(True)