In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Read in Data


* Must have the 'Ridership Model Data.csv' in the project folder;
* Create model_data for training the model which has non-relevant features dropped; and
* Create features to be used for generating future feature values.



In [None]:
# Read in data and split in to training and prediction sets

df = pd.read_csv('Ridership Model Data.csv')
model_data = df.drop(['ISOWeek','Month','Start of Week','Population Growth Rate',
                      'critical cases','BC Vaccination Rate'], axis=1)
features = model_data.drop(['Total Boardings'], axis=1)
feat_desc = features.describe()

In [None]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [None]:
def get_preds(year,expansion,restaurant_bookings,gas_price,uni_season,emp,wfh,covid=False):
  '''
  Generate prediction for specified values.
  '''
  if year > 2021:
    inc = year - 2021
  else:
    inc = 0

  if covid:
    yr = 2021
  else:
    yr = 2019
  
  revenue_hours = feat_desc['Revenue Hours']['mean'] + inc * expansion / 52
  employment = feat_desc['Employment']['mean'] * (1 + inc * emp)
  wfh = feat_desc['WFH']['mean'] * (1 + inc * wfh)
  if covid:
    hospitalizations = feat_desc['hospitalizations']['mean']
  else:
    hospitalizations = 0
  
  test_data = np.array([yr, revenue_hours, restaurant_bookings, gas_price, uni_season, employment, wfh, hospitalizations]).reshape(1, -1)
  test_data = pd.DataFrame(test_data,columns=features.columns)
  test_data = scaler.transform(test_data)
  prediction = model.predict(test_data)[0][0]

  print('Prediction: ' + '{:.2f}'.format(round(prediction, 2)) + ' +/- ' + '{:.2f}'.format(round(prediction * .12, 2)))
  print('\n')

# Train Test Split

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Scale the data for model training.
X = model_data.drop('Total Boardings',axis=1)
y = model_data['Total Boardings']

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Create and Train the Model

* Hyperparameters have been selected based on previous testing of the model.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import Adam

In [None]:
# Deep fully-connected neural network consisting of four layers (8 -> 5 -> 3 -> 1).
# Rectified linear unit activation functions for non-ouput layers.
# No dropouts as this hindered performance in the model testing.
# Adam optimizer and mse loss function for regression on a single output variable.
model = Sequential()

model.add(Dense(8,activation='relu'))
model.add(Dense(5,activation='relu'))
model.add(Dense(3,activation='relu'))
model.add(Dense(1))

model.compile(optimizer='Adam', loss='mse')

In [None]:
# Fit the model to all our data - 200 epochs chosen based on test runs with validation data.
model.fit(x=X,y=y,batch_size=1,epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7f8ea00f3450>

In [None]:
interact(get_preds,year=[('2022',2022),('2023',2023),('2024',2024)],
         expansion=widgets.IntSlider(value=0,min=-20000,max=20000,step=5000,description='expansion hours'),
         restaurant_bookings=widgets.FloatSlider(value=0,min=-1.0,max=1.0,step=.1,description='restaurant bookings'),
         gas_price=widgets.IntSlider(value=feat_desc['Gas Price (C/L)']['mean'],
                                     min=feat_desc['Gas Price (C/L)']['mean'] - 3 * feat_desc['Gas Price (C/L)']['std'],
                                     max=feat_desc['Gas Price (C/L)']['mean'] + 3 * feat_desc['Gas Price (C/L)']['std'],
                                     step=1, description='gas price'),
         uni_season=widgets.Dropdown(options=[('In Session',1),('Not in Session',2),('Holiday',3)],description='university'),
         emp=widgets.FloatSlider(value=0.0,
                               min=-.05,
                               max=.05,
                               step=.01, description='employment % change per year'),
         wfh=widgets.FloatSlider(value=0.0,
                               min=-1.0,
                               max=1.0,
                               step=.1, description='wfh % change per year')
         )

interactive(children=(Dropdown(description='year', options=(('2022', 2022), ('2023', 2023), ('2024', 2024)), v…

<function __main__.get_preds>