# EDA & Visualisation

In [None]:
%reset

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Common imports
import os
import timeit
import numpy as np
import pandas as pd
import seaborn as sns
from math import sqrt
from datetime import date
import holidays
sns.set()
import warnings
warnings.filterwarnings("ignore")

# To plot pretty figures
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
mpl.rcParams.update(mpl.rcParamsDefault)
mpl.rcParams["font.family"] = "serif"
mpl.rcParams["font.sans-serif"] = "Verdana"
# mpl.rcParams["lines.markersize"] = 20

## Incident File

Incident data of Kwinana Fwy (Inner) northbound from 2018-01-01 00:00:00 to 2018-10-25 23:59:00

In [None]:
import calendar
print(calendar.calendar(2018))

In [None]:
df1 = pd.read_csv('data/clean/Kwinana_Fwy_Historic_Incidents_2018_Wide.csv')
df1 = df1[df1.Incident_Type != 'Duplicate Entry']
df1 = df1.reset_index(drop=True)
df1.WST_Start = pd.to_datetime(df1.WST_Start)
df1.WST_End = pd.to_datetime(df1.WST_End)
df1.Incident_Type = df1.Incident_Type.str.replace('/', '/\n')
df1.info()

In [None]:
# days in which incident(s) occurred
df1['WST_Start'].dt.date.unique()

### Visualisation

In [None]:
fig, ax = plt.subplots(figsize=(6,6))
ax = sns.barplot(x=df1.Incident_Type.value_counts(),
           y=df1.Incident_Type.value_counts().index)
ax.set_xlabel('Frequency')
ax.set_xlim(right=200)
for p in ax.patches:
  width = p.get_width()
  ax.text(width + 1,
          p.get_y() + p.get_height()/2,
          int(width),
          ha="left",
          va="center")
#plt.savefig('fig/incident_type_1.png', bbox_inches="tight")
plt.show()

Since Special Event, Pothole / Road Surface Damage, and Hazmat (including spills) did not affect traffic congestion, these three categories are combined into one category called 'Special Event / Pothole / Hazmat'

Also, Flooding and Storm are combined into one category called 'Flooding / Storm'

In [None]:
df1.Incident_Type=df1.Incident_Type.replace(['Special Event',
                                            'Pothole /\n Road Surface Damage',
                                            'Hazmat (including spills)'],
                                           'Special Event /\nPothole / Hazmat')

df1.Incident_Type=df1.Incident_Type.replace(['Flooding', 'Storm'],
                                           'Flooding /\nStorm')

In [None]:
cmp = mpl.colors.ListedColormap(['#4053d3', '#00b25d', '#b51d14', '#ddb310'])

plt.rcParams.update({'font.size': 16})
fig, ax = plt.subplots(figsize=(10,8))
ax = sns.barplot(x=df1.Incident_Type.value_counts(),
           y=df1.Incident_Type.value_counts().index,
                palette=['#4053d3', '#00b25d', '#b51d14', '#ddb310'])
ax.set_ylabel('Incident Type', size=18)
ax.set_xlabel('Frequency', size=18)
ax.set_xlim(right=200)
for p in ax.patches:
  width = p.get_width()
  ax.text(width + 12,
          p.get_y() + p.get_height()/2,
          str(int(width)) + '\n(' + str(np.round(width/371*100, 1)) + '%)',
          ha="center",
          va="center")
plt.savefig('fig/incident_type.png', bbox_inches="tight")
plt.show()

In [None]:
df1.Incident_Type = df1.Incident_Type.replace([
    'Flooding /\nStorm', 'Special Event /\nPothole / Hazmat',
    'Special Event /\nPothole / Hazmat', 'Animal /\n Livestock',
    'Vehicle Fire'], 'Miscellaneous')

In [None]:
temp2 = df1.groupby(['Incident_Type', 'Congestion']).size().unstack()
temp2['sum'] = temp2.sum(axis=1)

plt.rcParams["figure.figsize"] = (10,8)
plt.rcParams.update({'font.size': 16})

cmp = mpl.colors.ListedColormap(['#efe645', '#e1562c', '#537eff'])

ax=(temp2.sort_values(by='sum').iloc[:,:-1]
    .plot(kind='barh', stacked=True, width=0.7,
          cmap=cmp))
ax.set_xlim(right=200)
ax.set_ylabel('Incident Type', size=18)
ax.set_xlabel('Frequency', size=18)
ax.legend(title='')
for i, v in enumerate(temp2.sort_values('sum')['sum']):
    ax.text(v+12, i, 
            str(int(v)) + '\n(' + str(np.round(v/temp2['sum'].sum()*100, 1)) + '%)',
            va = 'center', ha = 'center', fontsize = 14)
    
for n in temp2.iloc[:,:-1]: # for each column
    for i, (cs, ab) in enumerate(zip(temp2.sort_values(by='sum').cumsum(1)[n], 
                                     temp2.sort_values(by='sum')[n])):
        if ~np.isnan(cs) and ab > 4:
            ax.text(cs - ab / 2, i,
                 str(int(ab)) + '\n(' + str(np.round(ab/temp2['sum'].sum()*100, 1)) + '%)', 
                 va = 'center', ha = 'center', rotation = 20, fontsize = 12)   

plt.savefig('fig/incident_congestion_2.png', bbox_inches="tight")
plt.show()

In [None]:
df1.TrafficCondition=df1.TrafficCondition.replace(['Left Emergency Lane Blocked',
                                             'Right Emergency Lane Blocked'],
                                             'Emergency Lane Blocked')

df1.TrafficCondition=df1.TrafficCondition.replace(['Left Lane(s) Blocked',
                                             'Right Lane(s) Blocked',
                                             'Centre Lane(s) Blocked',
                                             'Left Centre Lane(s) Blocked',
                                             'Right Centre Lane(s) Blocked',
                                             'Bus Lane Blocked'],
                                             'Lane(s) Blocked')

df1.TrafficCondition=df1.TrafficCondition.replace(['Left Turning Pocket Blocked',
                                             'Right Turning Pocket Blocked'],
                                             'Turning Pocket Blocked')

In [None]:
temp2 = df1.groupby(['Incident_Type', 'TrafficCondition']).size().unstack()
temp2['sum'] = temp2.sum(axis=1)

plt.rcParams["figure.figsize"] = (10,8)
plt.rcParams.update({'font.size': 16})

cmp = mpl.colors.ListedColormap(['#00cb85', '#efe645', '#537eff',
                                '#e1562c', '#00e3ff'])

ax=(temp2.sort_values(by='sum').iloc[:,:-1]
    .plot(kind='barh', stacked=True, width=0.7,
          cmap=cmp))
ax.set_xlim(right=200)
ax.set_ylabel('Incident Type', size=18)
ax.set_xlabel('Frequency', size=18)
ax.legend(title='Traffic Condition')
for i, v in enumerate(temp2.sort_values('sum')['sum']):
    ax.text(v+12, i, 
            str(int(v)) + '\n(' + str(np.round(v/temp2['sum'].sum()*100, 1)) + '%)',
            va = 'center', ha = 'center', fontsize = 14)
    
for n in temp2.iloc[:,:-1]: # for each column
    for i, (cs, ab) in enumerate(zip(temp2.sort_values(by='sum').cumsum(1)[n], 
                                     temp2.sort_values(by='sum')[n])):
        if ~np.isnan(cs) and ab > 6:
            ax.text(cs - ab / 2, i,
                 str(int(ab)) + '\n(' + str(np.round(ab/temp2['sum'].sum()*100, 1)) + '%)', 
                 va = 'center', ha = 'center', rotation = 20, fontsize = 12)   

#plt.savefig('fig/incident_condition_2.png', bbox_inches="tight")
plt.show()

In [None]:
temp2 = df1.groupby(['TrafficCondition', 'Congestion']).size().unstack()
temp2['sum'] = temp2.sum(axis=1)

plt.rcParams["figure.figsize"] = (10,8)
plt.rcParams.update({'font.size': 16})

cmp = mpl.colors.ListedColormap(['#efe645', '#e1562c', '#537eff'])

ax=(temp2.sort_values(by='sum').iloc[:,:-1]
    .plot(kind='barh', stacked=True, width=0.7,
          cmap=cmp))
ax.set_xlim(right=200)
ax.set_ylabel('Traffic Condition', size=18)
ax.set_xlabel('Frequency', size=18)
ax.legend(title='')
for i, v in enumerate(temp2.sort_values('sum')['sum']):
    ax.text(v+12, i, 
            str(int(v)) + '\n(' + str(np.round(v/temp2['sum'].sum()*100, 1)) + '%)',
            va = 'center', ha = 'center', fontsize = 14)
    
for n in temp2.iloc[:,:-1]: # for each column
    for i, (cs, ab) in enumerate(zip(temp2.sort_values(by='sum').cumsum(1)[n], 
                                     temp2.sort_values(by='sum')[n])):
        if ~np.isnan(cs) and ab > 6:
            ax.text(cs - ab / 2, i,
                 str(int(ab)) + '\n(' + str(np.round(ab/temp2['sum'].sum()*100, 1)) + '%)', 
                 va = 'center', ha = 'center', rotation = 20, fontsize = 12)   

#plt.savefig('fig/condition_congestion.png', bbox_inches="tight")
plt.show()

In [None]:
def func2(a):
    if -32.091154 <= a < -32.080696:
        return "1"
    elif -32.080696 <= a < -32.074042:
        return "2"
    elif -32.074042 <= a < -32.071075:
        return "3"
    elif -32.071075 <= a < -32.057092:
        return "4"
    elif -32.057092 <= a < -32.052286:
        return "5"
    elif -32.052286 <= a < -32.043637:
        return "6"
    elif -32.043637 <= a < -32.040758:
        return "7"
    elif -32.040758 <= a < -32.030254:
        return "8"
    elif -32.030254 <= a < -32.012242:
        return "9"
    elif -32.012242 <= a < -32.010690:
        return "10"
    elif -32.010690 <= a < -32.003147:
        return "11"
    elif -32.003147 <= a < -31.969905:
        return "12"
    elif -31.969905 <= a < -31.966753:
        return "13"
    elif a >= -31.966753 :
        return "14"
    else:
        return "Other"

df1['ID'] = df1['Lat'].apply(lambda x: func2(x))
df1.ID = df1.ID.astype(int)
df1.head()

In [None]:
df1.describe()

In [None]:
temp2 = df1.groupby(['ID', 'Incident_Type']).size().unstack()
temp2['sum'] = temp2.sum(axis=1)

plt.rcParams["figure.figsize"] = (10,10)
plt.rcParams.update({'font.size': 16})

cmp = mpl.colors.ListedColormap(['#4053d3', '#00b25d', '#ddb310', '#b51d14'])

ax=(temp2.sort_values(by='sum').iloc[:,:-1]
    .plot(kind='barh', stacked=True, width=0.7,
          cmap=cmp))
ax.set_xlim(right=90)
ax.set_ylabel('Link', size=18)
ax.set_xlabel('Frequency', size=18)
ax.legend(title='Incident Type')
for i, v in enumerate(temp2.sort_values('sum')['sum']):
    ax.text(v+5, i, 
            str(int(v)) + '\n(' + str(np.round(v/temp2['sum'].sum()*100, 1)) + '%)',
            va = 'center', ha = 'center', fontsize = 12)
    
for n in temp2.iloc[:,:-1]: # for each column
    for i, (cs, ab) in enumerate(zip(temp2.sort_values(by='sum').cumsum(1)[n], 
                                     temp2.sort_values(by='sum')[n])):
        if ~np.isnan(cs) and ab >= 5:
            ax.text(cs - ab / 2, i-.01,
                 str(int(ab)) + '\n(' + str(np.round(ab/temp2['sum'].sum()*100, 1)) + '%)', 
                 va = 'center', ha = 'center', rotation=15, fontsize = 11)   

#plt.savefig('fig/incident_link_2.png', bbox_inches="tight")
plt.show()

In [None]:
del temp2

In [None]:
import matplotlib.cm as cm
import matplotlib.colors as mcolors

def colorbar_index(ncolors, cmap):
    cmap = cmap_discretize(cmap, ncolors)
    mappable = cm.ScalarMappable(cmap=cmap)
    mappable.set_array([])
    mappable.set_clim(-0.5, ncolors+0.5)
    colorbar = plt.colorbar(mappable)
    colorbar.set_ticks(np.linspace(0, ncolors, ncolors))
    colorbar.set_ticklabels(range(1, ncolors+1))
    colorbar.set_label('Link')
    
def cmap_discretize(cmap, N):
    """Return a discrete colormap from the continuous colormap cmap.

        cmap: colormap instance, eg. cm.jet. 
        N: number of colors.

    Example
        x = resize(arange(100), (5,100))
        djet = cmap_discretize(cm.jet, 5)
        imshow(x, cmap=djet)
    """

    if type(cmap) == str:
        cmap = plt.get_cmap(cmap)
    colors_i = np.concatenate((np.linspace(0, 1., N), (0.,0.,0.,0.)))
    colors_rgba = cmap(colors_i)
    indices = np.linspace(0, 1., N+1)
    cdict = {}
    for ki,key in enumerate(('red','green','blue')):
        cdict[key] = [ (indices[i], colors_rgba[i-1,ki], colors_rgba[i,ki])
                       for i in range(N+1) ]
    # Return colormap object.
    return mcolors.LinearSegmentedColormap(cmap.name + "_%d"%N, cdict, 1024)


cmp = mpl.colors.ListedColormap(['#ebac23', '#b80058', '#008cf9',
                                 '#006e00', '#00bbad', '#d163e6',
                                 '#b24502', '#ff9287', '#5954d6',
                                 '#00c6f8', '#878500', '#00a76c',
                                 '#bdbdbd', '#000078', '#b51d14'])
df1.plot(kind='scatter', x='Long', y='Lat', alpha=0.5,
        s='Duration', c='ID', label='Duration',
        cmap=cmp, colorbar=False, rot=45)
plt.ticklabel_format(useOffset=False)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(markerscale=0.2)
colorbar_index(ncolors=14, cmap=cmp)
#plt.savefig('fig/incident_loc_link_2.png', bbox_inches="tight")
plt.show()

## Traffic + Incident data

In [None]:
df = pd.read_csv('data/clean/LAD+incident2.csv', index_col=0)
df.DateTime = pd.to_datetime(df.DateTime)
df.info()

In [None]:
df.iloc[:,3:6] \
    .describe() \
    .apply(lambda s: s.apply('{0:.4f}'.format))

In [None]:
df.iloc[:,2:6].corr()

- Moderate positive correlation between volume and occupancy
- High negative correlation between speed and occupancy

### 27 Aug 18 (8.30 - 13.30)

In [None]:
v = df[['ID', 'DateTime', 'Volume', 'Speed', 'Occupancy']]
v = v[(v.DateTime >= '2018-08-27 08:30:00') &
   (v.DateTime <= '2018-08-27 13:44:00')]
v.ID = v.ID.astype(int)
v['Start'] = '2018-08-27 08:30:00'
v.Start = pd.to_datetime(v.Start)
v['Minutes'] = (v['DateTime'] - v['Start']).dt.total_seconds()/60
v['Density'] = v['Occupancy']*4000/700
v = v[['ID', 'DateTime', 'Volume', 'Speed', 'Density']]

piv = pd.pivot_table(v[['ID', 'DateTime', 'Volume']], index='DateTime', columns='ID', values='Volume')
pivs = pd.pivot_table(v[['ID', 'DateTime', 'Speed']], index='DateTime', columns='ID', values='Speed')
pivd = pd.pivot_table(v[['ID', 'DateTime', 'Density']], index='DateTime', columns='ID', values='Density')

In [None]:
piv.resample('15T').sum().to_csv('out_data/27Aug_vol15min.csv')
pivs.resample('15T').mean().round(4).to_csv('out_data/27Aug_speed15min.csv')
pivd.resample('15T').mean().astype(int).to_csv('out_data/27Aug_density15min.csv')

#### 3D Plot 1

In [None]:
# 3D plot Volume 1
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Volume'], cmap='jet')
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16, linespacing=1.5)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('Volume (veh/min)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.45, aspect=10, anchor=(-0.1,0.55))
#plt.savefig('fig/vol_3d_1.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Speed 1
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Speed'], cmap='jet')
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('Speed (km/hr)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.6, aspect=10, anchor=(-0.1,0.55))
#plt.savefig('fig/speed_3d_1.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Occupancy 1
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Occupancy']/100, cmap='jet')
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('\nOccupancy (veh$\cdot$km$\cdot$100/area)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.45, aspect=10, anchor=(-0.1,0.55))
#plt.savefig('fig/occupancy_3d_1.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Density 1
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Occupancy']/700*4000, cmap='jet')
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('\nDensity (veh/km)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.45, aspect=10, anchor=(-0.1,0.55))
#plt.savefig('fig/density_3d_1.png', bbox_inches="tight")
plt.show()

#### 3D Plot 2

In [None]:
# 3D plot Volume 2
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Volume'], cmap='jet')
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16, linespacing=1.5)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('Volume (veh/min)', fontsize=16)

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(25, 45)
fig.colorbar(surf, shrink=.45, aspect=8, anchor=(-1.2,0.5))
#plt.savefig('fig/vol_3d_2.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Speed 2
plt.rcParams.update({'font.size': 14})
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Speed'], cmap='jet')
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16, linespacing=1.5)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('Speed (km/hr)', fontsize=16)

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(25, 45)
fig.colorbar(surf, shrink=.55, aspect=8, anchor=(-1.2,0.5))
#plt.savefig('fig/speed_3d_2.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Occupancy 2
plt.rcParams.update({'font.size': 14})
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Occupancy']/100, cmap='jet')
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16, linespacing=1.5)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('\nOccupancy (veh$\cdot$km$\cdot$100/area)', fontsize=16)

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(25, 45)
fig.colorbar(surf, shrink=.5, aspect=8, anchor=(-1,0.5))
#plt.savefig('fig/occupancy_3d_2.png', bbox_inches="tight")
plt.show()

In [None]:
# 2D plot Volume
plt.rcParams.update({'font.size': 12})
fig, ax = plt.subplots(figsize=(12,6))
p = sns.lineplot(data=v, x='DateTime', y='Volume', hue=v.ID.astype(str))
ax.legend(loc='upper right', ncol=7).set_title('')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.xaxis.set_major_locator(mdates.MinuteLocator(byminute=[30])) 
ax.set_ylabel('Volume (veh/min)', fontsize=16)
ax.set_xlabel('Time (HH:MM)', fontsize=16)
ax.grid()
plt.show()

#### Heat map

In [None]:
# Volume
piv = pd.pivot_table(v[['ID', 'DateTime', 'Volume']],
                     index='DateTime', columns='ID', values='Volume')

fig, ax = plt.subplots()
hmap = ax.pcolormesh(piv.T, cmap='jet', vmin=0, vmax=v.Volume.max())
ax.set_xticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
#plt.savefig('fig/volume_hmap.png', bbox_inches="tight")
plt.show()

In [None]:
# Speed
piv = pd.pivot_table(v[['ID', 'DateTime', 'Speed']],
                     index='DateTime', columns='ID', values='Speed')

fig, ax = plt.subplots()
hmap = ax.pcolormesh(piv.T, cmap='jet', vmin=0, vmax=v.Speed.max())
ax.set_xticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
#plt.savefig('fig/speed_hmap.png', bbox_inches="tight")
plt.show()

In [None]:
# Density
piv = pd.pivot_table(v[['ID', 'DateTime', 'Occupancy']],
                     index='DateTime', columns='ID', values='Occupancy')

piv = piv/700 * 4000
fig, ax = plt.subplots()
hmap = ax.pcolormesh(piv.T, cmap='jet', vmin=0, vmax=v.Occupancy.max()/700 *4000)
ax.set_xticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
plt.savefig('fig/density_hmap.png', bbox_inches="tight")
plt.show()

### 27 Aug 18 (8.30 - 9.30)

In [36]:
v = df[['ID', 'DateTime', 'Volume', 'Speed', 'Occupancy']]
v = v[(v.DateTime >= '2018-08-27 08:30:00') &
   (v.DateTime <= '2018-08-27 09:30:00')]
v.ID = v.ID.astype(int)
v['Start'] = '2018-08-27 08:30:00'
v.Start = pd.to_datetime(v.Start)
v['Minutes'] = (v['DateTime'] - v['Start']).dt.total_seconds()/60
v['Density'] = v['Occupancy']*4000/700
v = v[['ID', 'DateTime', 'Volume', 'Speed', 'Density']]

piv = pd.pivot_table(v[['ID', 'DateTime', 'Volume']], index='DateTime', columns='ID', values='Volume')
pivs = pd.pivot_table(v[['ID', 'DateTime', 'Speed']], index='DateTime', columns='ID', values='Speed')
pivd = pd.pivot_table(v[['ID', 'DateTime', 'Density']], index='DateTime', columns='ID', values='Density')

In [37]:
piv.to_csv('out_data/27Aug_1min_volume.csv')
pivs.to_csv('out_data/27Aug_1min_speed.csv')
pivd.to_csv('out_data/27Aug_1min_density.csv')

### 3 Sep 18 (8.30 - 13.30) no incidents

In [None]:
v = df[['ID', 'DateTime', 'Volume', 'Speed', 'Occupancy']]
v = v[(v.DateTime >= '2018-09-03 08:30:00') &
   (v.DateTime <= '2018-09-03 13:44:00')]
v.ID = v.ID.astype(int)
v['Start'] = '2018-09-03 08:30:00'
v.Start = pd.to_datetime(v.Start)
v['Minutes'] = (v['DateTime'] - v['Start']).dt.total_seconds()/60
v['Density'] = v['Occupancy']*4000/700
v = v[['ID', 'DateTime', 'Volume', 'Speed', 'Density']]

piv = pd.pivot_table(v[['ID', 'DateTime', 'Volume']], index='DateTime', columns='ID', values='Volume')
pivs = pd.pivot_table(v[['ID', 'DateTime', 'Speed']], index='DateTime', columns='ID', values='Speed')
pivd = pd.pivot_table(v[['ID', 'DateTime', 'Density']], index='DateTime', columns='ID', values='Density')

In [None]:
piv.resample('15T').sum().to_csv('out_data/3Sep_vol15min.csv')
pivs.resample('15T').mean().round(4).to_csv('out_data/3Sep_speed15min.csv')
pivd.resample('15T').mean().astype(int).to_csv('out_data/3Sep_density15min.csv')

#### 3D Plot

In [None]:
v = df[['ID', 'DateTime', 'Volume', 'Speed', 'Occupancy']]
v = v[(v.DateTime >= '2018-09-03 08:30:00') &
   (v.DateTime <= '2018-09-03 13:30:00')]
v.ID = v.ID.astype(int)
v['Start'] = '2018-09-03 08:30:00'
v.Start = pd.to_datetime(v.Start)
v['Minutes'] = (v['DateTime'] - v['Start']).dt.total_seconds()/60
v['Density'] = v['Occupancy']*4000/700

In [None]:
# 3D plot Volume
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Volume'], cmap='jet')
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16, linespacing=1.5)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('Volume (veh/min)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.45, aspect=10, anchor=(-0.1,0.55))
#plt.savefig('fig/3Sep/3Sep_3D_Volume.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Speed
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Speed'], cmap='jet')
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16, linespacing=1.5)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('Speed (km/hr)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.6, aspect=10, anchor=(-0.1,0.55))
#plt.savefig('fig/3Sep/3Sep_3D_Speed.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Occupancy
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Occupancy']/100, cmap='jet')
ax.set_zlim(0, 0.45)
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('\nOccupancy (veh$\cdot$km$\cdot$100/area)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.45, aspect=10, anchor=(-0.1,0.55))
surf.set_clim(0, 0.4)
#plt.savefig('fig/3Sep/3Sep_3D_Occupancy.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Density
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Occupancy']/700*4000, cmap='jet')
ax.set_zlim(0, 250)
ax.set_yticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('\nDensity (veh/km)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.45, aspect=10, anchor=(-0.1,0.55))
surf.set_clim(0, 240)
#plt.savefig('fig/3Sep/3Sep_3D_Density.png', bbox_inches="tight")
plt.show()

#### Heatmap

In [None]:
# Volume
fig, ax = plt.subplots()
hmap = ax.pcolormesh(piv.T, cmap='jet', vmin=0, vmax=v.Volume.max())
ax.set_xticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
#plt.savefig('fig/3Sep/3Sep_Heatmap_Volume.png', bbox_inches="tight")
plt.show()

In [None]:
# Speed
fig, ax = plt.subplots()
hmap = ax.pcolormesh(pivs.T, cmap='jet', vmin=0, vmax=v.Speed.max())
ax.set_xticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
#plt.savefig('fig/3Sep/3Sep_Heatmap_Speed.png', bbox_inches="tight")
plt.show()

In [None]:
# Density

fig, ax = plt.subplots()
hmap = ax.pcolormesh(pivd.T, cmap='jet', vmin=0, vmax=258)
ax.set_xticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
#plt.savefig('fig/3Sep/3Sep_Heatmap_Density.png', bbox_inches="tight")
plt.show()

In [None]:
# Occupancy
pivo = pd.pivot_table(v[['ID', 'DateTime', 'Occupancy']],
                     index='DateTime', columns='ID', values='Occupancy')

pivo = pivo/100
fig, ax = plt.subplots()
hmap = ax.pcolormesh(pivo.T, cmap='jet', vmin=0, vmax=0.45)
ax.set_xticks(np.arange(0,301,60),
             ['8:30', '9:30', '10:30', '11:30', '12:30', '13:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
#plt.savefig('fig/3Sep/3Sep_Heatmap_Occupancy.png', bbox_inches="tight")
plt.show()

### 3 Sep 18 (8.30 - 9.30)

In [33]:
v = df[['ID', 'DateTime', 'Volume', 'Speed', 'Occupancy']]
v = v[(v.DateTime >= '2018-09-03 08:30:00') &
   (v.DateTime <= '2018-09-03 09:30:00')]
v.ID = v.ID.astype(int)
v['Start'] = '2018-09-03 08:30:00'
v.Start = pd.to_datetime(v.Start)
v['Minutes'] = (v['DateTime'] - v['Start']).dt.total_seconds()/60
v['Density'] = v['Occupancy']*4000/700

v = v[['ID', 'DateTime', 'Volume', 'Speed', 'Density']]

piv = pd.pivot_table(v[['ID', 'DateTime', 'Volume']], index='DateTime', columns='ID', values='Volume')
pivs = pd.pivot_table(v[['ID', 'DateTime', 'Speed']], index='DateTime', columns='ID', values='Speed')
pivd = pd.pivot_table(v[['ID', 'DateTime', 'Density']], index='DateTime', columns='ID', values='Density')

In [35]:
piv.to_csv('out_data/3Sep_1min_volume.csv')
pivs.to_csv('out_data/3Sep_1min_speed.csv')
pivd.to_csv('out_data/3Sep_1min_density.csv')

In [None]:
piv.resample('15T').sum().to_csv('out_data/3Sep_15min_vol.csv')
pivs.resample('15T').mean().round(4).to_csv('out_data/3Sep_15min_speed.csv')
pivd.resample('15T').mean().astype(int).to_csv('out_data/3Sep_15min_density.csv')

#### 3D Plot

In [None]:
# 3D plot Volume
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Volume'], cmap='jet')
ax.set_yticks(np.arange(0,61,10),
             ['8:30', '8:40', '8:50', '9:00', '9:10', '9:20', '9:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16, linespacing=1.5)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('Volume (veh/min)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.45, aspect=10, anchor=(-0.1,0.55))
#plt.savefig('fig/3Sep/830-930_3Sep_3D_Volume.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Speed
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Speed'], cmap='jet')
ax.set_yticks(np.arange(0,61,10),
             ['8:30', '8:40', '8:50', '9:00', '9:10', '9:20', '9:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16, linespacing=1.5)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('Speed (km/hr)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.6, aspect=10, anchor=(-0.1,0.55))
#plt.savefig('fig/3Sep/830-930_3Sep_3D_Speed.png', bbox_inches="tight")
plt.show()

In [None]:
# 3D plot Density
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams.update({'font.size': 14})
fig = plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
surf = ax.plot_trisurf(v['ID'], v['Minutes'], v['Occupancy']/700*4000, cmap='jet')
ax.set_zlim(0, 250)
ax.set_yticks(np.arange(0,61,10),
             ['8:30', '8:40', '8:50', '9:00', '9:10', '9:20', '9:30'])
ax.set_xticks(np.arange(1,15,1),
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_ylabel('\nTime (HH:MM)', fontsize=16)
ax.set_xlabel('\nLink', fontsize=16, linespacing=1.5)
ax.set_zlabel('\nDensity (veh/km)', fontsize=16)
ax.invert_xaxis()

ax.get_proj = lambda: np.dot(Axes3D.get_proj(ax),
                             np.diag([1.2, 0.8, 1, 1]))

ax.view_init(30, 60)
fig.colorbar(surf, shrink=.45, aspect=10, anchor=(-0.1,0.55))
surf.set_clim(0, 240)
plt.savefig('fig/3Sep/830-930_3Sep_3D_Density.png', bbox_inches="tight")
plt.show()

#### Heatmap

In [None]:
# Volume
fig, ax = plt.subplots()
hmap = ax.pcolormesh(piv.T, cmap='jet', vmin=0, vmax=v.Volume.max())
ax.set_xticks(np.arange(0,61,10),
             ['8:30', '8:40', '8:50', '9:00', '9:10', '9:20', '9:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
#plt.savefig('fig/3Sep/830-930_3Sep_Heatmap_Volume.png', bbox_inches="tight")
plt.show()

In [None]:
# Speed
fig, ax = plt.subplots()
hmap = ax.pcolormesh(pivs.T, cmap='jet', vmin=0, vmax=100)
ax.set_xticks(np.arange(0,61,10),
             ['8:30', '8:40', '8:50', '9:00', '9:10', '9:20', '9:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
#plt.savefig('fig/3Sep/830-930_3Sep_Heatmap_Speed.png', bbox_inches="tight")
plt.show()

In [None]:
# Density

fig, ax = plt.subplots()
hmap = ax.pcolormesh(pivd.T, cmap='jet', vmin=0, vmax=258)
ax.set_xticks(np.arange(0,61,10),
             ['8:30', '8:40', '8:50', '9:00', '9:10', '9:20', '9:30'])
ax.set_yticks(np.arange(1,15,1)-0.5,
              ['1', '2', '3', '4', '5', '6', '7',
               '8', '9', '10', '11', '12', '13', '14'])
ax.set_xlabel('Time (HH:MM)', fontsize=14)
ax.set_ylabel('Link', fontsize=14)
plt.colorbar(hmap)
plt.savefig('fig/3Sep/830-930_3Sep_Heatmap_Density.png', bbox_inches="tight")
plt.show()

### Prepare the data for deep learning algorithms

In [None]:
# Drop Congestion column
df.drop('Congestion', axis=1, inplace=True)

In [None]:
# NaN TrafficCondition == 'All Lanes Open'
# Replace duplicate conditions with sth more reasonable
df.TrafficCondition = df.TrafficCondition.replace({
    np.nan: 'All Lanes Open',
    'All Lanes Open, Emergency Lane Blocked': 'Emergency Lane Blocked',
    'Lane Closures Unknown, Lane(s) Blocked': 'Lane(s) Blocked',
    'Lane(s) Blocked, All Lanes Open': 'Lane(s) Blocked',
    'All Lanes Open, Lane(s) Blocked': 'Lane(s) Blocked',
    'Emergency Lane Blocked, Emergency Lane Blocked': 'Emergency Lane Blocked',
    'Lane Closures Unknown, All Lanes Open': 'All Lanes Open',
    'Lane(s) Blocked, Lane(s) Blocked': 'Lane(s) Blocked'
})

df.TrafficCondition.value_counts()

In [None]:
# Replace everything else other than BDTA, DTLL and RC as Misc
df.Incident_Type = df.Incident_Type.replace([
    'Flooding /\nStorm', 'Special Event /\nPothole / Hazmat',
    'Special Event /\nPothole / Hazmat', 'Animal /\n Livestock',
    'Vehicle Fire'], 'Miscellaneous')

df.Incident_Type = df.Incident_Type.replace({
    'Flooding /\nStorm, Road Crash': 'Road Crash, Miscellaneous',
    'Miscellaneous, Break Down /\n Tow Away': 'Break Down /\n Tow Away, Miscellaneous',
    'Flooding /\nStorm, Break Down /\n Tow Away': 'Break Down /\n Tow Away, Miscellaneous',
})
df.Incident_Type.value_counts()

In [None]:
# Count number of incidents by ', ' + 1
df['Num_Incidents'] = df.Incident_Type.str.count(', ')
df['Num_Incidents'] = df['Num_Incidents'] + 1
df['Num_Incidents'] = df['Num_Incidents'].replace(np.nan, 0)
df['Num_Incidents'].value_counts()

In [None]:
# Split string incident type into two columns by ', '
df[['Incident_Type1','Incident_Type2']]=df['Incident_Type'].str.split(', ', 1, expand=True)

In [None]:
# Drop Incident_Type column
df.drop('Incident_Type', axis=1, inplace=True)

In [None]:
# Replace Incident_Type name for easier access

df.Incident_Type1 = df.Incident_Type1.replace({
    np.nan:'None1',
    'Break Down /\n Tow Away':'BDTA1',
    'Debris /\n Trees /\n Lost Loads':'DTLL1',
    'Road Crash':'RC1',
    'Miscellaneous':'Misc1'
})

df.Incident_Type2 = df.Incident_Type2.replace({
    np.nan:'None2',
    'Break Down /\n Tow Away':'BDTA2',
    'Debris /\n Trees /\n Lost Loads':'DTLL2',
    'Road Crash':'RC2',
    'Miscellaneous':'Misc2'
})

In [None]:
df.Incident_Type1.value_counts()

In [None]:
df.Incident_Type2.value_counts()

In [None]:
df.info()

#### Link 11

Consider

- Link-11's full data and Link-12's incidents (downstream) lagged by 2 minutes
- Incident_Type1, Incident_Type2
    + No Incidents
    + Break Down / Tow Away
    + Debris / Trees / Lost Loads
    + Road Crash
    + miscellaneous: everything else

In [None]:
df11 = df[df.ID==11]
df12 = df[df.ID==12]

df11.drop('ID', axis=1, inplace=True)
df11 = df11.set_index('DateTime')
df11.index = pd.to_datetime(df11.index)
if not df11.index.is_monotonic:
    df11 = df11.sort_index()

df11.info()

In [None]:
# Convert string columns to binary
from sklearn.preprocessing import LabelBinarizer

cond_enc = LabelBinarizer()
inc1_enc = LabelBinarizer()

df11 = df11.join(pd.DataFrame(cond_enc.fit_transform(df11['TrafficCondition']),
                             columns=cond_enc.classes_,
                             index=df11.index))

df11 = df11.join(pd.DataFrame(inc1_enc.fit_transform(df11['Incident_Type1']),
                             columns=inc1_enc.classes_,
                             index=df11.index))

df11['RC2'] = df11.Incident_Type2 == 'RC2'
df11['RC2'] = df11['RC2'].astype(int)

In [None]:
# Downstream incident
import datetime
df12['Downstream_Incident'] = (df12.Incident_Type1 != 'None1').astype(int)

# DateTime column lag by 2 minutes
df12.DateTime = df12.DateTime - datetime.timedelta(minutes=2)
df12 = df12[['DateTime', 'Downstream_Incident']]

# Merge data file
df11ds = pd.merge(df11, df12, how='left', on=['DateTime'])

In [None]:
df11ds.to_csv('data/df11ds.csv')

In [None]:
inc11 = df1[df1.ID == 11]
inc12 = df1[df1.ID == 12]
inc11.to_csv('data/clean/inc11_wide.csv')
inc12.to_csv('data/clean/inc12_wide.csv')

In [None]:
inc9 = df1[df1.ID == 9]
inc9.to_csv('data/clean/inc9_wide.csv')

#### Link 1

Continue form 1.2 Traffic + Incident data

In [None]:
df1 = df[df.ID==1]
df1.drop('ID', axis=1, inplace=True)
df1 = df1.set_index('DateTime')
df1.index = pd.to_datetime(df1.index)
if not df1.index.is_monotonic:
    df1 = df1.sort_index()
    
df2 = df1[['Volume']]
df2 = df2.loc['2018-08-27 08:30:00':'2018-08-27 13:44:00']
df2 = df2.groupby(df2.index.floor('15T').time).sum()

df3 = df1[['Volume']]
df3 = df3.loc['2018-09-03 08:30:00':'2018-09-03 13:44:00']
df3 = df3.groupby(df3.index.floor('15T').time).sum()

In [None]:
df2.to_csv('out_data/27Aug_Link1_volume.csv')

In [None]:
df3.to_csv('out_data/3Sep_Link1_volume.csv')

## LAD

In [None]:
lad = pd.read_csv('data/clean/LAD.csv', index_col=0)