In [1]:
import pandas as pd
import forecaster_tool.forecast_tool as ft

  from pandas import Int64Index as NumericIndex


In [2]:
file_path = 'C:/Users/11941/Downloads/DS5500/NYCData/'
tool = ft.ForecasterTool()
data = tool.load_data(file_path + 'NYC_subway_traffic_2017-2021.csv', unique_id = 1, 
                      target = 'Entries',features = [], freq = '4h')
data.head()

Unnamed: 0,Entries
2017-02-04 04:00:00,403.0
2017-02-04 08:00:00,1284.0
2017-02-04 12:00:00,2316.0
2017-02-04 16:00:00,2383.0
2017-02-04 20:00:00,1526.0


In [3]:
tool.train_test_split(train_start_date='2017-03-05', train_end_date='2018-03-05',
                      test_start_date='2018-03-06', test_end_date='2018-04-06');

In [4]:
from IPython.utils import io
from timeit import default_timer as timer

def run_experiments(models, runs):
    perf_summary = {'model':[]}
    for model in models:
        perf_summary['model'].append(model)
        
        for i in range(runs):
            if model == 'Arima':
                tool.create_forecaster(model, forecast_steps = 6)
            else:
                tool.create_forecaster(model, look_back_steps = 24, forecast_steps = 6)
                
            if 'MAE_Run'+str(i+1) not in perf_summary:
                perf_summary['MAE_Run'+str(i+1)] = []
            if 'RMSE_Run'+str(i+1) not in perf_summary:
                perf_summary['RMSE_Run'+str(i+1)] = []
            if 'MAPE_Run'+str(i+1) not in perf_summary:
                perf_summary['MAPE_Run'+str(i+1)] = []
            if 'Time_Spent'+str(i+1) not in perf_summary:
                perf_summary['Time_Spent'+str(i+1)] = []
                
            with io.capture_output() as captured:
                start = timer()
                forecasts, m = tool.fit();
                end = timer()
            time_elapsed = end - start
            
            perf_df = tool.calculate_performance(forecasts, plot=False, horizon_step=2)
            mae_value = perf_df['General'].values.tolist()[0]
            rmse_value = perf_df['General'].values.tolist()[1]
            mape_value = perf_df['General'].values.tolist()[2]
            perf_summary['MAE_Run'+str(i+1)].append(mae_value)
            perf_summary['RMSE_Run'+str(i+1)].append(rmse_value)
            perf_summary['MAPE_Run'+str(i+1)].append(mape_value)
            perf_summary['Time_Spent'+str(i+1)].append(time_elapsed)
            print(model + ' Run '+str(i+1)+' finished')
            
    result = pd.DataFrame.from_dict(perf_summary)
    result['MAE_mean'] = result[['MAE_Run'+str(i+1) for i in range(runs)]].mean(axis=1)
    result['RMSE_mean'] = result[['RMSE_Run'+str(i+1) for i in range(runs)]].mean(axis=1)
    result['MAPE_mean'] = result[['MAPE_Run'+str(i+1) for i in range(runs)]].mean(axis=1)
    result['Time_mean'] = result[['Time_Spent'+str(i+1) for i in range(runs)]].mean(axis=1)
    
    return result
            
         

In [7]:
model_list=['MLPModel', 'NBeats', 'TCNModel','Transformer']
result_df = run_experiments(model_list, 5)

MLPModel Run 1 finished
MLPModel Run 2 finished
MLPModel Run 3 finished
MLPModel Run 4 finished
MLPModel Run 5 finished
NBeats Run 1 finished
NBeats Run 2 finished
NBeats Run 3 finished
NBeats Run 4 finished
NBeats Run 5 finished
TCNModel Run 1 finished
TCNModel Run 2 finished
TCNModel Run 3 finished
TCNModel Run 4 finished
TCNModel Run 5 finished
Transformer Run 1 finished
Transformer Run 2 finished
Transformer Run 3 finished
Transformer Run 4 finished
Transformer Run 5 finished


In [8]:
result_df

Unnamed: 0,model,MAE_Run1,RMSE_Run1,MAPE_Run1,Time_Spent1,MAE_Run2,RMSE_Run2,MAPE_Run2,Time_Spent2,MAE_Run3,...,MAPE_Run4,Time_Spent4,MAE_Run5,RMSE_Run5,MAPE_Run5,Time_Spent5,MAE_mean,RMSE_mean,MAPE_mean,Time_mean
0,MLPModel,266.354232,435.918707,0.496088,11.599542,286.724138,463.166191,0.487859,18.893045,293.30094,...,0.501832,16.75187,288.978056,474.179763,0.517773,13.805729,283.081087,459.504389,0.512615,14.559934
1,NBeats,380.111808,628.286799,0.57876,43.655204,365.521421,595.738704,0.610543,28.572864,332.295716,...,0.500538,30.176675,339.835946,574.248983,0.509449,56.614065,350.645768,586.0676,0.549404,39.475755
2,TCNModel,314.567398,492.984618,0.574959,131.473113,291.408568,483.813053,0.510428,171.541693,311.402299,...,0.526739,244.732439,285.6907,480.503115,0.510909,189.734194,298.716405,485.488706,0.54103,178.662803
3,Transformer,269.54023,451.047811,0.478754,425.919783,307.974922,488.023572,0.466813,846.222737,253.891327,...,0.492524,476.579436,259.676071,447.072818,0.437923,623.422093,272.256635,451.737911,0.456879,577.201456


In [5]:
model_list_2=['LSTM', 'CNN']
result_df_2 = run_experiments(model_list_2, 5)

LSTM Run 1 finished
LSTM Run 2 finished
LSTM Run 3 finished
LSTM Run 4 finished
LSTM Run 5 finished
CNN Run 1 finished
CNN Run 2 finished
CNN Run 3 finished
CNN Run 4 finished
CNN Run 5 finished


In [6]:
result_df_2

Unnamed: 0,model,MAE_Run1,RMSE_Run1,MAPE_Run1,Time_Spent1,MAE_Run2,RMSE_Run2,MAPE_Run2,Time_Spent2,MAE_Run3,...,MAPE_Run4,Time_Spent4,MAE_Run5,RMSE_Run5,MAPE_Run5,Time_Spent5,MAE_mean,RMSE_mean,MAPE_mean,Time_mean
0,LSTM,281.804598,460.414539,0.46737,89.932637,307.587252,483.116408,0.615852,111.020562,289.501567,...,0.453435,78.422896,266.07837,455.413573,0.425883,112.937491,282.426541,458.83833,0.507028,86.265802
1,CNN,295.788924,479.278012,0.524799,15.932054,282.519331,450.514587,0.470593,11.20051,292.369906,...,0.455552,20.701035,284.704284,468.237285,0.512054,14.563722,288.312853,467.660218,0.491547,15.841455
