In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

# tensorflow
import tensorflow as tf
from tensorflow import keras

from keras import backend as K
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, Dropout, Conv1D, RepeatVector, TimeDistributed
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.wrappers.scikit_learn import KerasRegressor

# Common imports
import os
import timeit
import numpy as np
import pandas as pd
import seaborn as sns
from math import sqrt
from datetime import date
import holidays
sns.set()
import warnings
warnings.filterwarnings("ignore")

# To plot pretty figures
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
mpl.rcParams.update(mpl.rcParamsDefault)
mpl.rcParams["font.family"] = "serif"
mpl.rcParams["font.sans-serif"] = "Verdana"

In [2]:
df = pd.read_excel("data/Smart Freeways Data 2018 - Ramps All Days.xlsx",
                   skiprows=3, header=0)
df.head()

Unnamed: 0,Row Labels,01 Jan 18,02 Jan 18,03 Jan 18,04 Jan 18,05 Jan 18,06 Jan 18,07 Jan 18,08 Jan 18,09 Jan 18,...,23 Oct 18,24 Oct 18,25 Oct 18,26 Oct 18,27 Oct 18,28 Oct 18,29 Oct 18,30 Oct 18,31 Oct 18,01 Nov 18
0,KwN Manning On (130) (H547@310),3986.0,4988.0,5496.0,5685.0,5931.0,5324.0,4364.0,6188.0,6741.0,...,8808.0,8926.0,9306.0,9222.0,7147.0,5691.0,8532.0,8878.0,9133.0,9100.0
1,0130KWN-OR,,,,,,,,,,...,,,,,,,,,,
2,0130KWN-OR1,3986.0,4988.0,5496.0,5685.0,5931.0,5324.0,4364.0,6188.0,6741.0,...,8808.0,8926.0,9306.0,9222.0,7147.0,5691.0,8532.0,8878.0,9133.0,9100.0
3,00:00 - 00:15,16.0,9.0,11.0,10.0,14.0,27.0,21.0,6.0,8.0,...,10.0,13.0,11.0,16.0,33.0,32.0,16.0,23.0,18.0,6.0
4,00:15 - 00:30,38.0,7.0,11.0,11.0,8.0,18.0,19.0,8.0,5.0,...,6.0,10.0,13.0,13.0,18.0,39.0,10.0,13.0,12.0,16.0


In [3]:
df2 = pd.read_csv('data/clean/LAD.csv')
df2.DateTime = pd.to_datetime(df2.DateTime)
df2.head()

Unnamed: 0,ID,DateTime,Length,Volume,Speed,Occupancy
0,1,2018-01-01 00:00:00,960.0,7.0,96.0,1.0
1,1,2018-01-01 00:01:00,960.0,6.0,94.999998,1.0
2,1,2018-01-01 00:02:00,960.0,5.0,90.999999,1.0
3,1,2018-01-01 00:03:00,960.0,5.0,94.999997,1.0
4,1,2018-01-01 00:04:00,960.0,5.0,92.999999,1.0


## 27 Aug

In [None]:
df1 = df[['Row Labels', '27 Aug 18']]
df1['Time'] = df1['Row Labels']
df1 = df1[['Row Labels', 'Time', '27 Aug 18']]

# For Row Labels, replace time format %H:%M - %H:%M with NaN
# For DateTime, replace everything else as NaN

df1['Row Labels'] = df1['Row Labels'].replace(
    '([01]?[0-9]|2[0-4]):[0-5][0-9](:[0-5][0-9])?\s-\s([01]?[0-9]|2[0-4]):[0-5][0-9](:[0-5][0-9])?',
    np.nan, regex=True)
df1['Row Labels'] = df1['Row Labels'].ffill(axis=0)
df1 = df1[~df1['Row Labels'].str.contains('KwN ', na=False)]
df1 = df1[~df1['Row Labels'].str.contains('Grand Total', na=False)]
df1['Row Labels'].unique()

In [None]:
# 27 Aug
OR_df = df1.copy()
OR_df = OR_df[~OR_df['Row Labels'].str.contains('XR', na=False)]
OR_df = OR_df[~OR_df['Row Labels'].str.contains('210', na=False)]
OR_df = OR_df[OR_df['27 Aug 18'].notnull()]
OR_df.Time = OR_df.Time.str[:5]
OR_df = OR_df[OR_df.Time.str.contains(':', na=False)]
OR_df = OR_df[~OR_df['Row Labels'].str.contains('T', na=False)]

OR_df['Time'] = pd.to_datetime(OR_df['Time'], format='%H:%M')
OR_df = OR_df.melt(id_vars=['Row Labels', 'Time'],
                  var_name='DateTime', value_name='Volume')
OR_df.DateTime = (pd.to_datetime(OR_df['DateTime']) +  
              pd.to_timedelta(OR_df.pop('Time').dt.strftime('%H:%M:%S')))

OR_df = OR_df.sort_values(['Row Labels', 'DateTime'], 
                          ascending=[True, True])
OR_df.reset_index(drop=True, inplace=True)

In [None]:
# new = OR_df[(OR_df.DateTime >= '2018-08-27 8:30:00') &
#      (OR_df.DateTime <= '2018-08-27 13:30:00')]
new = OR_df[(OR_df.DateTime >= '2018-08-27 8:30:00') &
      (OR_df.DateTime <= '2018-08-27 09:30:00')]
new = pd.pivot_table(new, values='Volume', index='DateTime',
               columns='Row Labels').reset_index()

g = new.set_index('DateTime')
g['0084KWN-ORL'] = g['0084KWN-OR1L'] + g['0084KWN-OR2L']
g['0089KWN-ORL'] = g['0089KWN-OR1L'] + g['0089KWN-OR2L']
g = g[['0081KWN-OR1L', '0084KWN-ORL', '0089KWN-ORL', '0130KWN-OR1',
       '0150KWN-OR1', '0160KWN-OR1L', '0200KWN-OR1L', '0702KWN-OR1L']]
g.columns = ['H500', 'H549', 'H617', 'H547', 'H553', 'H554', 'H622', 'H558']
# g = g*4
g

In [None]:
# g.to_csv('out_data/27Aug_OR.csv', index=True)

In [None]:
plt.rcParams.update({'font.size': 14})
plt.rcParams.update({'lines.linewidth': 2})
fig, ax = plt.subplots(figsize=(8,5))
p = sns.lineplot(data=g, ax=ax, linewidth = 2)
ax.legend(loc='upper right', 
               ncol=round(len(g.columns)/4)).set_title('')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
# ax.xaxis.set_major_locator(mdates.MinuteLocator(byminute=[30])) 
ax.set_ylabel('Volume (veh/hr)', fontsize=20)
ax.set_xlabel('Time (HH:MM)', fontsize=18)
ax.grid()
#plt.savefig('fig/27Aug_Ramps_Volume', bbox_inches="tight")
plt.show()

In [None]:
dr = pd.date_range('2018-08-27 08:30:00', '2018-08-27 09:30:00', freq='1T')
newg = g.reindex(dr).interpolate()

In [None]:
plt.rcParams.update({'font.size': 14})
plt.rcParams.update({'lines.linewidth': 2})
fig, ax = plt.subplots(figsize=(8,5))
p = sns.lineplot(data=newg, ax=ax, linewidth = 2)
ax.legend(loc='upper right', 
               ncol=round(len(newg.columns)/4)).set_title('')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
# ax.xaxis.set_major_locator(mdates.MinuteLocator(byminute=[30])) 
ax.set_ylabel('Volume (veh/hr)', fontsize=20)
ax.set_xlabel('Time (HH:MM)', fontsize=18)
plt.savefig('fig/830-930_27Aug_Ramps_Volume', bbox_inches="tight")
plt.show()

### demand file

In [None]:
df2 = df2[(df2.ID == 1) &
          (df2.DateTime >= '2018-08-27 08:30:00') &
          (df2.DateTime <= '2018-08-27 09:30:00')]

df2['corridor_id'] = 'KWN'
df2['demand'] = df2.Volume.astype(int)
df2['volume'] = df2.Volume.astype(int)
df2['speed'] = df2.Speed
df2['time'] = (df2.DateTime - pd.to_datetime('2018-08-27 08:30:00')).astype('timedelta64[m]').astype(int)

In [None]:
df3 = df2[['time', 'corridor_id', 'demand']]
df3

In [None]:
dr = pd.date_range('2018-08-27 08:30:00', '2018-08-27 09:30:00', freq='1T')
newg = g.reindex(dr).interpolate()

In [None]:
# demand file
newg = newg.unstack().reset_index(name='demand')
newg.rename(columns={'level_0': 'corridor_id', 'level_1': 'dt'}, inplace=True)
newg['time'] = (newg.dt - pd.to_datetime('2018-08-27 08:30:00')).astype('timedelta64[m]').astype(int)
newg = newg[['time', 'corridor_id', 'demand']]
newg.demand = np.ceil(newg.demand).astype(int)
newg

In [None]:
# demand file
# pd.concat([df3, newg]).to_csv('out_data/CaseII_27Aug_demand.csv', index=False)

### supply file

In [None]:
newg = g.reindex(dr).interpolate()

In [None]:
# supply file
import datetime as dt
newg = newg.unstack().reset_index(name='volume')
newg.rename(columns={'level_0': 'corridor_id', 'level_1': 'dt'}, inplace=True)
newg.volume = np.ceil(newg.volume).astype(int)

newg['date'] = '27/08/2018'
newg['time_period'] = newg['dt'].dt.strftime('%H%M_') + (newg['dt'] + dt.timedelta(minutes=1)).dt.strftime('%H%M')
newg.time_period = newg.time_period.replace({'0930_0931':'0930_0830'})

In [None]:
s = pd.read_csv('data1min/CaseII_supply27Aug1Min.csv')
s.groupby(['link_id_tmc', 'corridor_id', 'corridor_link_order',
           'from_node_id', 'to_node_id', 'geometry', 
           'num_of_lanes', 'reference_speed']).count()

In [None]:
s.groupby(['link_id_tmc', 'corridor_id', 'corridor_link_order',
           'from_node_id', 'to_node_id', 'geometry', 
           'num_of_lanes', 'reference_speed']).count().iloc[:,0].to_csv('out_data/test.csv')

In [None]:
s = pd.read_csv('out_data/test.csv')
s.head()

In [None]:
from itertools import cycle, islice
news = pd.DataFrame(np.repeat(s.values, 61, axis=0),columns=s.columns)

tp = list(newg.time_period.unique())
it = cycle(tp)
news['time_period'] = list(islice(it, len(news)))

news['date'] = '27/08/2018'
news['travel_time'] = np.nan
news['reference_speed'] = 60
news['queue'] = np.nan
news['notes'] = np.nan

In [None]:
newg = newg[['corridor_id', 'date', 'time_period', 'volume']]; newg

In [None]:
df3 = df2[['time', 'corridor_id', 'volume', 'speed']]
df3.reset_index(inplace=True)
df3['time_period'] = tp
df3['date'] = '27/08/2018'
df3 = df3[['corridor_id', 'date', 'time_period', 'volume', 'speed']]
df3

In [None]:
dat = pd.concat([newg, df3]); dat

In [None]:
odf = news.merge(dat, on=['corridor_id', 'date', 'time_period'], how='left')

In [None]:
odf['density'] = np.nan
odf = odf[['link_id_tmc', 'corridor_id', 'corridor_link_order',
           'from_node_id', 'to_node_id', 'time_period', 'date',
           'geometry', 'volume', 'num_of_lanes', 'travel_time',
           'speed', 'reference_speed', 'density', 'queue', 'notes']]
odf.volume = odf.volume.fillna(87)
odf.info()

In [None]:
# odf.to_csv('out_data/CaseII_27Aug_Supply.csv', index=False)

## 3 Sep

In [None]:
df1 = df[['Row Labels', '03 Sep 18']]
df1['Time'] = df1['Row Labels']
df1 = df1[['Row Labels', 'Time', '03 Sep 18']]
df1.head()

In [None]:
# For Row Labels, replace time format %H:%M - %H:%M with NaN
# For DateTime, replace everything else as NaN

df1['Row Labels'] = df1['Row Labels'].replace(
    '([01]?[0-9]|2[0-4]):[0-5][0-9](:[0-5][0-9])?\s-\s([01]?[0-9]|2[0-4]):[0-5][0-9](:[0-5][0-9])?',
    np.nan, regex=True)
df1['Row Labels'] = df1['Row Labels'].ffill(axis=0)
df1 = df1[~df1['Row Labels'].str.contains('KwN ', na=False)]
df1 = df1[~df1['Row Labels'].str.contains('Grand Total', na=False)]
df1['Row Labels'].unique()

In [None]:
# 3 Aug
OR_df = df1.copy()
OR_df = OR_df[~OR_df['Row Labels'].str.contains('XR', na=False)]
OR_df = OR_df[~OR_df['Row Labels'].str.contains('210', na=False)]
OR_df = OR_df[OR_df['03 Sep 18'].notnull()]
OR_df.Time = OR_df.Time.str[:5]
OR_df = OR_df[OR_df.Time.str.contains(':', na=False)]
OR_df = OR_df[~OR_df['Row Labels'].str.contains('T', na=False)]

In [None]:
OR_df['Row Labels'].value_counts().sort_index()

In [None]:
OR_df['Time'].value_counts().sort_index()

In [None]:
OR_df['Time'] = pd.to_datetime(OR_df['Time'], format='%H:%M')
OR_df = OR_df.melt(id_vars=['Row Labels', 'Time'],
                  var_name='DateTime', value_name='Volume')
OR_df.DateTime = (pd.to_datetime(OR_df['DateTime']) +  
              pd.to_timedelta(OR_df.pop('Time').dt.strftime('%H:%M:%S')))

OR_df = OR_df.sort_values(['Row Labels', 'DateTime'], 
                          ascending=[True, True])
OR_df.reset_index(drop=True, inplace=True)

In [None]:
# new = OR_df[(OR_df.DateTime >= '2018-09-03 8:30:00') &
#      (OR_df.DateTime <= '2018-09-03 13:30:00')]
new = OR_df[(OR_df.DateTime >= '2018-09-03 8:30:00') &
     (OR_df.DateTime <= '2018-09-03 09:30:00')]
new = pd.pivot_table(new, values='Volume', index='DateTime',
               columns='Row Labels').reset_index()

g = new.set_index('DateTime')
g['0084KWN-ORL'] = g['0084KWN-OR1L'] + g['0084KWN-OR2L']
g['0089KWN-ORL'] = g['0089KWN-OR1L'] + g['0089KWN-OR2L']
g = g[['0081KWN-OR1L', '0084KWN-ORL', '0089KWN-ORL', '0130KWN-OR1',
       '0150KWN-OR1', '0160KWN-OR1L', '0200KWN-OR1L', '0702KWN-OR1L']]
g.columns = ['H500', 'H549', 'H617', 'H547', 'H553', 'H554', 'H622', 'H558']
# g = g*4
g

In [None]:
plt.rcParams.update({'font.size': 14})
plt.rcParams.update({'lines.linewidth': 2})
fig, ax = plt.subplots(figsize=(8,5))
p = sns.lineplot(data=g, ax=ax, linewidth = 2)
ax.legend(loc='upper right', 
               ncol=round(len(g.columns)/4)).set_title('')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
# ax.xaxis.set_major_locator(mdates.MinuteLocator(byminute=[30])) 
ax.set_ylabel('Volume (veh/hr)', fontsize=20)
ax.set_xlabel('Time (HH:MM)', fontsize=18)
plt.savefig('fig/3Sep/830-930_3Sep_Ramps_Volume', bbox_inches="tight")
plt.show()

In [None]:
# g.to_csv('out_data/3Sep_OR.csv', index=True)

### demand file

In [None]:
df2 = df2[(df2.ID == 1) &
          (df2.DateTime >= '2018-09-03 08:30:00') &
          (df2.DateTime <= '2018-09-03 09:30:00')]

df2['corridor_id'] = 'KWN'
df2['demand'] = df2.Volume.astype(int)
df2['volume'] = df2.Volume.astype(int)
df2['speed'] = df2.Speed
df2['time'] = (df2.DateTime - pd.to_datetime('2018-09-03 08:30:00')).astype('timedelta64[m]').astype(int)

In [None]:
df3 = df2[['time', 'corridor_id', 'demand']]
df3

In [None]:
dr = pd.date_range('2018-09-03 08:30:00', '2018-09-03 09:30:00', freq='1T')
newg = g.reindex(dr).interpolate()

In [None]:
# demand file
newg = newg.unstack().reset_index(name='demand')
newg.rename(columns={'level_0': 'corridor_id', 'level_1': 'dt'}, inplace=True)
newg['time'] = (newg.dt - pd.to_datetime('2018-09-03 08:30:00')).astype('timedelta64[m]').astype(int)
newg = newg[['time', 'corridor_id', 'demand']]
newg.demand = np.ceil(newg.demand).astype(int)
newg

In [None]:
# demand file
pd.concat([df3, newg]).to_csv('out_data/CaseII_3Sep_demand.csv', index=False)

### supply file

In [None]:
newg = g.reindex(dr).interpolate()

In [None]:
# supply file
import datetime as dt
newg = newg.unstack().reset_index(name='volume')
newg.rename(columns={'level_0': 'corridor_id', 'level_1': 'dt'}, inplace=True)
newg.volume = np.ceil(newg.volume).astype(int)

newg['date'] = '03/09/2018'
newg['time_period'] = newg['dt'].dt.strftime('%H%M_') + (newg['dt'] + dt.timedelta(minutes=1)).dt.strftime('%H%M')
newg.time_period = newg.time_period.replace({'0930_0931':'0930_0830'})

In [None]:
df3 = df2[['time', 'corridor_id', 'volume', 'speed']]
df3.reset_index(inplace=True)
df3['time_period'] = tp
df3['date'] = '03/09/2018'
df3 = df3[['corridor_id', 'date', 'time_period', 'volume', 'speed']]
df3

In [None]:
s = pd.read_csv('out_data/test.csv')
s.head()

In [None]:
from itertools import cycle, islice
news = pd.DataFrame(np.repeat(s.values, 61, axis=0),columns=s.columns)

tp = list(newg.time_period.unique())
it = cycle(tp)
news['time_period'] = list(islice(it, len(news)))

news['date'] = '03/09/2018'
news['travel_time'] = np.nan
news['reference_speed'] = 60
news['queue'] = np.nan
news['notes'] = np.nan

In [None]:
newg = newg[['corridor_id', 'date', 'time_period', 'volume']]; newg

In [None]:
df3 = df2[['time', 'corridor_id', 'volume', 'speed']]
df3.reset_index(inplace=True)
df3['time_period'] = tp
df3['date'] = '03/09/2018'
df3 = df3[['corridor_id', 'date', 'time_period', 'volume', 'speed']]
df3

In [None]:
dat = pd.concat([newg, df3]); dat

In [None]:
odf = news.merge(dat, on=['corridor_id', 'date', 'time_period'], how='left')

In [None]:
odf.info()

In [None]:
odf['density'] = np.nan
odf = odf[['link_id_tmc', 'corridor_id', 'corridor_link_order',
           'from_node_id', 'to_node_id', 'time_period', 'date',
           'geometry', 'volume', 'num_of_lanes', 'travel_time',
           'speed', 'reference_speed', 'density', 'queue', 'notes']]
odf.volume = odf.volume.fillna(87)
odf.info()

In [None]:
# odf.to_csv('out_data/CaseII_3Sep_Supply.csv', index=False)

## Link7+8+Ramp554 (0160)

In [None]:
df2 = pd.read_csv('data/clean/LAD.csv')
df2.DateTime = pd.to_datetime(df2.DateTime)
df2.head()

In [None]:
df2 = df2[(df2.ID == 7) | (df2.ID == 8)]
df2.ID.value_counts()

In [None]:
df2.to_csv('data/clean/L7+L8.csv', index=False)

In [None]:
df1 = pd.read_csv('data/clean/0160KWN-ORT.csv')
df1.head()

In [None]:
df1.info()

In [None]:
df1['Start Time'] = df1['Start Time'].str[:5]
df1['Start Time'] = pd.to_datetime(df1['Start Time'], format='%H:%M')

df1 = df1.melt(id_vars=['Start Time'],
                  var_name='DateTime', value_name='Volume')
df1.DateTime = (pd.to_datetime(df1['DateTime']) +  
              pd.to_timedelta(df1.pop('Start Time').dt.strftime('%H:%M:%S')))

In [None]:
df1 = df1.sort_values(['DateTime'], ascending=[True])
df1.reset_index(drop=True, inplace=True)
df1.head()

In [None]:
df1.info()

In [None]:
df1.to_csv('data/clean/Ramp554.csv', index=False)