In this notebook two versions of the feedforward NN / MLP and the LSTM model are applied. 
The feedforward NN versions comprise a model which only uses one taxi district as input to predict the future demand (referred to as "SingleMLP") and a model which uses multiple taxi districts as input (referred to as "ComplexMLP"). The same concept applies to the LSTM models: The "SingleLSTM" only processes data of a particular taxi district while the "MultivarLSTM" processes multiple districts at the same time to make predictions for multiple areas. The classes of the models which contain all methods necessary to process the data and train the models are imported.

The idea of each class is to put all required preprocessing steps and the training of the model at one place. Additionally, pre-trained models can be "loaded" into the class. 


## Import Packages

In [1]:
import pandas as pd
import numpy as np
 
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import datetime
from dateutil.relativedelta import relativedelta

import sklearn.preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

import itertools

import os

import math

#from tqdm import tqdm

#import keras specific functions for storing and loading models
from keras.models import Model

from keras.models import model_from_json



#load custom deep Models (LSTM, MLP)
from custom_deepmodels import SingleLSTM

from multivar_lstm import MultivariateLSTM 

from complex_mlp import ComplexMLP

from single_mlp import SingleMLP



import statsmodels.api as sm

import scipy.stats as st




Using TensorFlow backend.


## Load & prepare data

In [2]:
#load data
TRAIN_PATH = '/media/...'
Store_PATH = '/media/...'
file_final = 'preprocessed_taxidemand.csv'

df_m = pd.read_csv(TRAIN_PATH + file_final, header=0)

#convert to datetime format:
df_m['date'] = pd.to_datetime(df_m['date'], utc=True)
df_m['date'] = df_m['date'].dt.strftime('%Y-%m-%d %H:%M:%S')
df_m['date'] = pd.to_datetime(df_m['date'])
#df_m = df_m.set_index("date") -> set index later, since we need "date" column to find highest demand columns..
print(df_m.head())

                 date  1  2  3    4  5  6   7  8  9  ...  254  255  256  257  \
0 2009-01-01 05:00:00  0  0  0   91  0  0  30  0  0  ...    0   50   39    3   
1 2009-01-01 06:00:00  1  0  0  105  0  0  62  0  0  ...    0   77   67    5   
2 2009-01-01 07:00:00  0  0  0   96  0  0  79  0  0  ...    0   90   83    4   
3 2009-01-01 08:00:00  0  0  0   91  0  0  84  0  0  ...    0   54   77    3   
4 2009-01-01 09:00:00  2  0  0   82  0  0  85  0  1  ...    0   66   54    4   

   258  259  260  261  262  263  
0    1    0    3   52  127  326  
1    0    0   15   65  166  476  
2    0    0   19   39  125  460  
3    1    0   19   54   79  313  
4    0    0   13   24   47  224  

[5 rows x 264 columns]


In [3]:
'''filter areas with highest demand '''
#get time series with highest "demand patterns":

#function filters nlargest areas:
def get_nlargest_areas(nlargest, org_dataset = df_m):
    
    #get time series with highest "demand patterns":
    df_sum = org_dataset.copy(deep=True).drop(columns=["date"],axis=1)
    df_sum = df_sum.sum(axis=0,numeric_only=True)

    #store nlargest values:
    df_sum = df_sum.nlargest(nlargest) 
    idx_filter = list(df_sum.index.values)
    #append "date" column
    idx_filter.append("date")

    del df_sum
    
    #filter columns with largest values:
    ts_largest = org_dataset[idx_filter].copy(deep=True)
    ts_largest = ts_largest.set_index("date")

    #shift datetimeindex to use local NYC time not UTC:
    ts_largest.index = ts_largest.index.shift(-5,freq='H')

    return ts_largest


In [4]:
ts_20largest = get_nlargest_areas(20)
ts_10largest = get_nlargest_areas(10)

print('20 largest areas: ', ts_20largest.shape)
print('10 largest areas: ', ts_10largest.shape)


20 largest areas:  (83231, 20)
10 largest areas:  (83231, 10)


#### function to load models

In [5]:
def load_pretrained_model_from_disk(model_type):
    
    '''
    Returns pre-trained model from disk
    '''


    model_PATH = '/media/...'

    #complex MLP:
    model_architecture_complex_MLP_PATH = '/media/...'
    complex_MLP_model_file = 'complex_MLP_early_stopping_W168_20areas__y2012.json'
    complex_MLP_weights = 'complex_MLP_early_stopping_W168_20areas__y2012_weights.h5'
    
    
    #multivar LSTM without additional features:
    model_architecture_multivar_LSTM_PATH = '/media/...'
    multivar_LSTM_file = 'multivar_LSTM_W168_20areas__y2012.json'
    multivar_LSTM_weights = 'multivar_LSTM_W168_20areas__y2012_weights.h5'


    
    instances_dict = {'SingleMLP': (),
                  'SingleLSTM': (),
                  'ComplexMLP': (complex_MLP_model_file, model_architecture_complex_MLP_PATH, complex_MLP_weights),
                  'MultivarLSTM': (multivar_LSTM_file, model_architecture_multivar_LSTM_PATH, multivar_LSTM_weights)

                 }
    
       
    

    #load complexMLP model 20largest areas:
    json_file = open(model_PATH + instances_dict[model_type][0], 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    prediction_model = model_from_json(loaded_model_json)

    #load weights of best model:
    prediction_model.load_weights(instances_dict[model_type][1] + instances_dict[model_type][2])


    
    return prediction_model



## Train new models 

### SingleLSTM

In [8]:
#create instance of single LSTM model
snglelSTM = SingleLSTM()

start_train_year = '2009'
last_train_set_year = '2010'
validation_set_year = '2011' 
test_set_year = '2012' 


#slice single area of input data:
single_ts_series = ts_20largest.loc['2009':'2014'].iloc[:,0]


#create full model for single area:
#returns predictions for all single area for given years (validation year & test year) while model is trained on data of 2009 & 2010
results_i = snglelSTM.create_full_pred_model(single_ts_series, start_train_year, 
                                            last_train_set_year, validation_set_year, 
                                            test_set_year, 'TEST_singleLSTM', verbose=1)

years selected:
start_validation_set_year  2011
start_test_set_year  2012
end_validation_set_year  None
end_test_set_year  None
##
ts diff shape:  (52415, 169)
Train/Test Split...
generate data..
start_validation_set_year:  2011
end_validation_set_year:  2011
start_test_set_year:  2012
end_test_set_year:  2012
Data is scaled...
MinMax-Scaling used...
Reshape data for LSTM model...
X_train shape before modeling:  (17351, 168, 1)
X_valid shape before modeling:  (8760, 168, 1)
X_test shape before modeling:  (8784, 168, 1)
y_train shape before modeling:  (17351,)
y_valid shape before modeling:  (8760,)
y_test shape before modeling:  (8784,)
scaler type:  MinMaxScaler(copy=True, feature_range=(-1, 1))
create stacked LSTM 2 layer non-stateful model:
#Dropout applied
#Clipping Norm applied
Regular 2H-LSTM Model is created...
Train on 17351 samples, validate on 8760 samples
Epoch 1/150
#Current LearningRate:  0.001

Epoch 00001: val_loss improved from inf to 0.04276, saving model to /media/vin

#Current LearningRate:  0.001

Epoch 00019: val_loss improved from 0.01791 to 0.01746, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 20/150
#Current LearningRate:  0.001

Epoch 00020: val_loss improved from 0.01746 to 0.01623, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 21/150
#Current LearningRate:  0.001

Epoch 00021: val_loss improved from 0.01623 to 0.01548, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 22/150
#Current LearningRate:  0.001

Epoch 00022: val_loss did not improve from 0.01548
Epoch 23/150
#Current LearningRate:  0.001

Epoch 00023: val_loss improved from 0.01548 to 0.01445, saving model to /

#Current LearningRate:  0.001

Epoch 00044: val_loss did not improve from 0.00923
Epoch 45/150
#Current LearningRate:  0.001

Epoch 00045: val_loss improved from 0.00923 to 0.00895, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 46/150
#Current LearningRate:  0.001

Epoch 00046: val_loss did not improve from 0.00895
Epoch 47/150
#Current LearningRate:  0.001

Epoch 00047: val_loss did not improve from 0.00895
Epoch 48/150
#Current LearningRate:  0.001

Epoch 00048: val_loss did not improve from 0.00895
Epoch 49/150
#Current LearningRate:  0.001

Epoch 00049: val_loss did not improve from 0.00895
Epoch 50/150
#Current LearningRate:  0.001

Epoch 00050: val_loss improved from 0.00895 to 0.00866, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoc

Epoch 71/150
#Current LearningRate:  0.001

Epoch 00071: val_loss did not improve from 0.00749
Epoch 72/150
#Current LearningRate:  0.001

Epoch 00072: val_loss did not improve from 0.00749
Epoch 73/150
#Current LearningRate:  0.001

Epoch 00073: val_loss did not improve from 0.00749
Epoch 74/150
#Current LearningRate:  0.001

Epoch 00074: val_loss did not improve from 0.00749
Epoch 75/150
#Current LearningRate:  0.001

Epoch 00075: val_loss did not improve from 0.00749
Epoch 76/150
#Current LearningRate:  0.001

Epoch 00076: val_loss did not improve from 0.00749
Epoch 77/150
#Current LearningRate:  0.001

Epoch 00077: val_loss improved from 0.00749 to 0.00739, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 78/150
#Current LearningRate:  0.001

Epoch 00078: val_loss improved from 0.00739 to 0.00722, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab

#Current LearningRate:  0.001

Epoch 00132: val_loss did not improve from 0.00693
Epoch 133/150
#Current LearningRate:  0.001

Epoch 00133: val_loss did not improve from 0.00693
Epoch 134/150
#Current LearningRate:  0.001

Epoch 00134: val_loss did not improve from 0.00693
Epoch 135/150
#Current LearningRate:  0.001

Epoch 00135: val_loss did not improve from 0.00693
Epoch 136/150
#Current LearningRate:  0.001

Epoch 00136: val_loss did not improve from 0.00693
Epoch 137/150
#Current LearningRate:  0.001

Epoch 00137: val_loss did not improve from 0.00693
Epoch 138/150
#Current LearningRate:  0.001

Epoch 00138: val_loss did not improve from 0.00693
Epoch 139/150
#Current LearningRate:  0.001

Epoch 00139: val_loss did not improve from 0.00693
Epoch 140/150
#Current LearningRate:  0.001

Epoch 00140: val_loss did not improve from 0.00693
Epoch 141/150
#Current LearningRate:  0.001

Epoch 00141: val_loss did not improve from 0.00693
Epoch 142/150
#Current LearningRate:  0.001

Epoch 001

### SingleMLP

In [9]:
#test_lagged features:
sngleMLP = SingleMLP()

start_train_year = '2009'
last_train_set_year = '2010'
validation_set_year = '2011' 
test_set_year = '2012' 


#slice a sample of input data for a single area:
single_ts_series = ts_20largest.loc['2009':'2014'].iloc[:,0]


#create full model for single area:
#returns predictions for all single area for given years (validation year & test year) while model is trained on data of 2009 & 2010
results_i = sngleMLP.create_full_pred_model(single_ts_series, start_train_year, 
                                            last_train_set_year, validation_set_year, 
                                            test_set_year, 'TEST_singleMLP', verbose=1)

generate data..
start_validation_set_year:  2011
end_validation_set_year:  None
start_test_set_year:  2012
end_test_set_year:  None
Train/Test Split...
# dates adjusted:
start_validation_set_year:  2011
end_validation_set_year:  2011
start_test_set_year:  2012
end_test_set_year:  2012
Data is scaled...
Standardizing used...
Reshape data for MLP model...
X_train shape before modeling:  (16847, 172)
X_valid shape before modeling:  (8760, 172)
X_test shape before modeling:  (8784, 172)
y_train shape before modeling:  (16847,)
y_valid shape before modeling:  (8760,)
y_test shape before modeling:  (8784,)
scaler type:  StandardScaler(copy=True, with_mean=True, with_std=True)
#Dropout applied
#Clipping Norm applied
Train on 16847 samples, validate on 8760 samples
Epoch 1/150
#Current LearningRate:  0.001

Epoch 00001: val_loss improved from inf to 0.31581, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Mod

#Current LearningRate:  0.001

Epoch 00020: val_loss improved from 0.17324 to 0.17137, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models_MLP/TEST_Runtime_bestmodel.h5
Epoch 21/150
#Current LearningRate:  0.001

Epoch 00021: val_loss improved from 0.17137 to 0.17112, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models_MLP/TEST_Runtime_bestmodel.h5
Epoch 22/150
#Current LearningRate:  0.001

Epoch 00022: val_loss did not improve from 0.17112
Epoch 23/150
#Current LearningRate:  0.001

Epoch 00023: val_loss improved from 0.17112 to 0.16979, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models_MLP/TEST_Runtime_bestmodel.h5
Epoch 24/150
#Current LearningRate:  0.001

Epoch 00024: val_loss improved from 0.16979 to 0.16770, savin

#Current LearningRate:  0.001

Epoch 00072: val_loss did not improve from 0.14780
Epoch 73/150
#Current LearningRate:  0.001

Epoch 00073: val_loss did not improve from 0.14780
Epoch 74/150
#Current LearningRate:  0.001

Epoch 00074: val_loss did not improve from 0.14780
Epoch 75/150
#Current LearningRate:  0.001

Epoch 00075: val_loss improved from 0.14780 to 0.14767, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models_MLP/TEST_Runtime_bestmodel.h5
Epoch 76/150
#Current LearningRate:  0.001

Epoch 00076: val_loss improved from 0.14767 to 0.14704, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models_MLP/TEST_Runtime_bestmodel.h5
Epoch 77/150
#Current LearningRate:  0.001

Epoch 00077: val_loss did not improve from 0.14704
Epoch 78/150
#Current LearningRate:  0.001

Epoch 00078: val_loss did not improve from 0.14

#Current LearningRate:  0.001

Epoch 00101: val_loss did not improve from 0.14612
Epoch 102/150
#Current LearningRate:  0.001

Epoch 00102: val_loss did not improve from 0.14612
Epoch 103/150
#Current LearningRate:  0.001

Epoch 00103: val_loss did not improve from 0.14612
Epoch 104/150
#Current LearningRate:  0.001

Epoch 00104: val_loss did not improve from 0.14612
Epoch 105/150
#Current LearningRate:  0.001

Epoch 00105: val_loss did not improve from 0.14612
Epoch 106/150
#Current LearningRate:  0.001

Epoch 00106: val_loss did not improve from 0.14612
Epoch 107/150
#Current LearningRate:  0.001

Epoch 00107: val_loss did not improve from 0.14612
Epoch 108/150
#Current LearningRate:  0.001

Epoch 00108: val_loss did not improve from 0.14612
Epoch 109/150
#Current LearningRate:  0.001

Epoch 00109: val_loss did not improve from 0.14612
Epoch 110/150
#Current LearningRate:  0.001

Epoch 00110: val_loss did not improve from 0.14612
Epoch 111/150
#Current LearningRate:  0.001

Epoch 001

#Current LearningRate:  0.001

Epoch 00134: val_loss did not improve from 0.14612
Epoch 135/150
#Current LearningRate:  0.001

Epoch 00135: val_loss did not improve from 0.14612
Epoch 136/150
#Current LearningRate:  0.001

Epoch 00136: val_loss did not improve from 0.14612
Epoch 137/150
#Current LearningRate:  0.001

Epoch 00137: val_loss did not improve from 0.14612
Epoch 138/150
#Current LearningRate:  0.001

Epoch 00138: val_loss did not improve from 0.14612
Epoch 139/150
#Current LearningRate:  0.001

Epoch 00139: val_loss did not improve from 0.14612
Epoch 140/150
#Current LearningRate:  0.001

Epoch 00140: val_loss did not improve from 0.14612
Epoch 141/150
#Current LearningRate:  0.001

Epoch 00141: val_loss did not improve from 0.14612
Epoch 142/150
#Current LearningRate:  0.001

Epoch 00142: val_loss did not improve from 0.14612
Epoch 143/150
#Current LearningRate:  0.001

Epoch 00143: val_loss did not improve from 0.14612
Epoch 144/150
#Current LearningRate:  0.001

Epoch 001

### ComplexMLP

In [10]:
#the complex MLP processed data of multiple areas at the same time

cplxMLP = ComplexMLP()


start_train_year = '2009'
last_train_set_year = '2010'
validation_set_year = '2011' 
test_set_year = '2012' 

#slice input data for 20 busiest areas:
multivar_ts_series = ts_20largest.loc['2009':'2014']

#model creation:
#returns predictions for all 20 areas for given years (validation year & test year) while model is trained on data of 2009 & 2010
results_i = cplxMLP.create_full_pred_model(multivar_ts_series, start_train_year, last_train_set_year, 
                                           validation_set_year,test_set_year,
                                           'TEST_complexMLP',                                        
                                            verbose=1)


generate data..
start_train_year:  2009
last_train_set_year:  2010
start_validation_set_year:  2011
start_test_set_year:  2012
end_validation_set_year:  None
end_test_set_year:  None
# adjusted dates..
start_train_year:  2009
last_train_set_year:  2010
start_validation_set_year:  2011
start_test_set_year:  2012
end_validation_set_year:  2011
end_test_set_year:  2012
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing

Epoch 18/150
#Current LearningRate:  0.001

Epoch 00018: val_loss did not improve from 0.22214
Epoch 19/150
#Current LearningRate:  0.001

Epoch 00019: val_loss did not improve from 0.22214
Epoch 20/150
#Current LearningRate:  0.001

Epoch 00020: val_loss did not improve from 0.22214
Epoch 21/150
#Current LearningRate:  0.001

Epoch 00021: val_loss did not improve from 0.22214
Epoch 22/150
#Current LearningRate:  0.001

Epoch 00022: val_loss did not improve from 0.22214
Epoch 23/150
#Current LearningRate:  0.001

Epoch 00023: val_loss did not improve from 0.22214
Epoch 24/150
#Current LearningRate:  0.001

Epoch 00024: val_loss improved from 0.22214 to 0.21992, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models_MLP/TEST_Runtime_MLP_bestmodel.h5
Epoch 25/150
#Current LearningRate:  0.001

Epoch 00025: val_loss did not improve from 0.21992
Epoch 26/150
#Current LearningRate:  0.001

Epoch 00026: val

#Current LearningRate:  0.001

Epoch 00046: val_loss did not improve from 0.21724
Epoch 47/150
#Current LearningRate:  0.001

Epoch 00047: val_loss improved from 0.21724 to 0.21708, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models_MLP/TEST_Runtime_MLP_bestmodel.h5
Epoch 48/150
#Current LearningRate:  0.001

Epoch 00048: val_loss did not improve from 0.21708
Epoch 49/150
#Current LearningRate:  0.001

Epoch 00049: val_loss did not improve from 0.21708
Epoch 50/150
#Current LearningRate:  0.001

Epoch 00050: val_loss did not improve from 0.21708
Epoch 51/150
#Current LearningRate:  0.001

Epoch 00051: val_loss improved from 0.21708 to 0.21654, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models_MLP/TEST_Runtime_MLP_bestmodel.h5
Epoch 52/150
#Current LearningRate:  0.001

Epoch 00052: val_loss did not improve f

#Current LearningRate:  0.001

Epoch 00076: val_loss did not improve from 0.21576
Epoch 77/150
#Current LearningRate:  0.001

Epoch 00077: val_loss did not improve from 0.21576
Epoch 78/150
#Current LearningRate:  0.001

Epoch 00078: val_loss did not improve from 0.21576
Epoch 79/150
#Current LearningRate:  0.001

Epoch 00079: val_loss did not improve from 0.21576
Epoch 80/150
#Current LearningRate:  0.001

Epoch 00080: val_loss did not improve from 0.21576
Epoch 81/150
#Current LearningRate:  0.001

Epoch 00081: val_loss did not improve from 0.21576
Epoch 82/150
#Current LearningRate:  0.001

Epoch 00082: val_loss did not improve from 0.21576
Epoch 83/150
#Current LearningRate:  0.001

Epoch 00083: val_loss did not improve from 0.21576
Epoch 84/150
#Current LearningRate:  0.001

Epoch 00084: val_loss did not improve from 0.21576
Epoch 85/150
#Current LearningRate:  0.001

Epoch 00085: val_loss did not improve from 0.21576
Epoch 86/150
#Current LearningRate:  0.001

Epoch 00086: val_lo

#Current LearningRate:  0.001

Epoch 00138: val_loss did not improve from 0.21512
Epoch 139/150
#Current LearningRate:  0.001

Epoch 00139: val_loss did not improve from 0.21512
Epoch 140/150
#Current LearningRate:  0.001

Epoch 00140: val_loss did not improve from 0.21512
Epoch 141/150
#Current LearningRate:  0.001

Epoch 00141: val_loss did not improve from 0.21512
Epoch 142/150
#Current LearningRate:  0.001

Epoch 00142: val_loss did not improve from 0.21512
Epoch 143/150
#Current LearningRate:  0.001

Epoch 00143: val_loss did not improve from 0.21512
Epoch 144/150
#Current LearningRate:  0.001

Epoch 00144: val_loss did not improve from 0.21512
Epoch 145/150
#Current LearningRate:  0.001

Epoch 00145: val_loss did not improve from 0.21512
Epoch 146/150
#Current LearningRate:  0.001

Epoch 00146: val_loss did not improve from 0.21512
Epoch 147/150
#Current LearningRate:  0.001

Epoch 00147: val_loss did not improve from 0.21512
Epoch 148/150
#Current LearningRate:  0.001

Epoch 001

RMSE per TS 0 : model: results_2012 : 70.1774741920154
RMSE per TS 1 : model: results_2012 : 87.31683982366056
RMSE per TS 2 : model: results_2012 : 76.78499772570444
RMSE per TS 3 : model: results_2012 : 75.74667501110457
RMSE per TS 4 : model: results_2012 : 63.31250273922924
RMSE per TS 5 : model: results_2012 : 75.89812318904372
RMSE per TS 6 : model: results_2012 : 58.07929370266036
RMSE per TS 7 : model: results_2012 : 64.11859579366421
RMSE per TS 8 : model: results_2012 : 62.62450275173861
RMSE per TS 9 : model: results_2012 : 75.23100962884303
RMSE per TS 10 : model: results_2012 : 69.73150467629065
RMSE per TS 11 : model: results_2012 : 50.98474740620433
RMSE per TS 12 : model: results_2012 : 56.00039055927089
RMSE per TS 13 : model: results_2012 : 51.07507733833642
RMSE per TS 14 : model: results_2012 : 50.178591708094004
RMSE per TS 15 : model: results_2012 : 48.91410580740934
RMSE per TS 16 : model: results_2012 : 44.91928564543685
RMSE per TS 17 : model: results_2012 : 52

### MultivarLSTM

In [6]:
#default feature creation with Multivar LSTM model:
multLSTM = MultivariateLSTM()

start_train_year = '2009'
last_train_set_year = '2010'
validation_set_year = '2011' 
test_set_year = '2012' 


#slice input data for 20 busiest areas:
multivar_ts_series = ts_20largest.loc['2009':'2014']


#create full model:
#returns predictions for all 20 areas for given years (validation year & test year) while model is trained on data of 2009 & 2010
results_i = multLSTM.create_full_pred_model(multivar_ts_series, start_train_year, 
                                            last_train_set_year, validation_set_year, 
                                            test_set_year, 'TEST_multivarLSTM', verbose=1)

start_validation_set_year  2011
start_test_set_year  2012
end_validation_set_year  None
end_test_set_year  None
generate data..
st_valid_year:  2011
end_valid_year:  2011
st_test_year:  2012
end_test_year:  2012
create data of area  237
create data of area  161
create data of area  230
create data of area  79
create data of area  236
create data of area  162
create data of area  170
create data of area  234
create data of area  48
create data of area  186
create data of area  142
create data of area  107
create data of area  163
create data of area  68
create data of area  239
create data of area  164
create data of area  141
create data of area  249
create data of area  138
create data of area  90
Data is scaled...
MinMax-Scaling used...
Data is scaled...
MinMax-Scaling used...
Data is scaled...
MinMax-Scaling used...
Data is scaled...
MinMax-Scaling used...
Data is scaled...
MinMax-Scaling used...
Data is scaled...
MinMax-Scaling used...
Data is scaled...
MinMax-Scaling used...
Data 

#Current LearningRate:  0.001

Epoch 00034: val_loss improved from 0.00999 to 0.00980, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 35/150
#Current LearningRate:  0.001

Epoch 00035: val_loss improved from 0.00980 to 0.00973, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 36/150
#Current LearningRate:  0.001

Epoch 00036: val_loss improved from 0.00973 to 0.00972, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 37/150
#Current LearningRate:  0.001

Epoch 00037: val_loss did not improve from 0.00972
Epoch 38/150
#Current LearningRate:  0.001

Epoch 00038: val_loss did not improve from 0.00972
Epoch 39/150
#Current 

Epoch 57/150
#Current LearningRate:  0.001

Epoch 00057: val_loss did not improve from 0.00917
Epoch 58/150
#Current LearningRate:  0.001

Epoch 00058: val_loss did not improve from 0.00917
Epoch 59/150
#Current LearningRate:  0.001

Epoch 00059: val_loss improved from 0.00917 to 0.00909, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 60/150
#Current LearningRate:  0.001

Epoch 00060: val_loss improved from 0.00909 to 0.00902, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 61/150
#Current LearningRate:  0.001

Epoch 00061: val_loss did not improve from 0.00902
Epoch 62/150
#Current LearningRate:  0.001

Epoch 00062: val_loss did not improve from 0.00902
Epoch 63/150
#Current LearningRate:  0.001

Epoch 00063: val_loss did not improve from

Epoch 84/150
#Current LearningRate:  0.001

Epoch 00084: val_loss did not improve from 0.00872
Epoch 85/150
#Current LearningRate:  0.001

Epoch 00085: val_loss did not improve from 0.00872
Epoch 86/150
#Current LearningRate:  0.001

Epoch 00086: val_loss did not improve from 0.00872
Epoch 87/150
#Current LearningRate:  0.001

Epoch 00087: val_loss improved from 0.00872 to 0.00870, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 88/150
#Current LearningRate:  0.001

Epoch 00088: val_loss improved from 0.00870 to 0.00868, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 89/150
#Current LearningRate:  0.001

Epoch 00089: val_loss did not improve from 0.00868
Epoch 90/150
#Current LearningRate:  0.001

Epoch 00090: val_loss improved from 0.0086

#Current LearningRate:  0.001

Epoch 00111: val_loss did not improve from 0.00846
Epoch 112/150
#Current LearningRate:  0.001

Epoch 00112: val_loss improved from 0.00846 to 0.00845, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 113/150
#Current LearningRate:  0.001

Epoch 00113: val_loss did not improve from 0.00845
Epoch 114/150
#Current LearningRate:  0.001

Epoch 00114: val_loss improved from 0.00845 to 0.00843, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 115/150
#Current LearningRate:  0.001

Epoch 00115: val_loss did not improve from 0.00843
Epoch 116/150
#Current LearningRate:  0.001

Epoch 00116: val_loss improved from 0.00843 to 0.00843, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/resul

#Current LearningRate:  0.001

Epoch 00138: val_loss did not improve from 0.00825
Epoch 139/150
#Current LearningRate:  0.001

Epoch 00139: val_loss did not improve from 0.00825
Epoch 140/150
#Current LearningRate:  0.001

Epoch 00140: val_loss improved from 0.00825 to 0.00824, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 141/150
#Current LearningRate:  0.001

Epoch 00141: val_loss did not improve from 0.00824
Epoch 142/150
#Current LearningRate:  0.001

Epoch 00142: val_loss did not improve from 0.00824
Epoch 143/150
#Current LearningRate:  0.001

Epoch 00143: val_loss did not improve from 0.00824
Epoch 144/150
#Current LearningRate:  0.001

Epoch 00144: val_loss did not improve from 0.00824
Epoch 145/150
#Current LearningRate:  0.001

Epoch 00145: val_loss did not improve from 0.00824
Epoch 146/150
#Current LearningRate:  0.001

Epoch 00146: val_loss did not

RMSE per TS 19 for model: results_2012: 45.46370899216406
Avg.RMSE for multivariate model: results_2012: 67.18339318370815


In [11]:
#use additional lagged features:

#additional features are created based on time series data:
multLSTM_encod = MultivariateLSTM(use_features_per_lag_flag = True)

start_train_year = '2009'
last_train_set_year = '2010'
validation_set_year = '2011' 
test_set_year = '2012' 


multivar_ts_series = ts_20largest.loc['2009':'2014']


#create full model:
results_i = multLSTM_encod.create_full_pred_model(multivar_ts_series, start_train_year, 
                                            last_train_set_year, validation_set_year, 
                                            test_set_year, 'TEST_MultivarLSTM', verbose=1)




start_validation_set_year  2011
start_test_set_year  2012
end_validation_set_year  None
end_test_set_year  None
generate data..
st_valid_year:  2011
end_valid_year:  2011
st_test_year:  2012
end_test_year:  2012
create data of area  237
get lag encodings of area  237
create data of area  161
get lag encodings of area  161
create data of area  230
get lag encodings of area  230
create data of area  79
get lag encodings of area  79
create data of area  236
get lag encodings of area  236
create data of area  162
get lag encodings of area  162
create data of area  170
get lag encodings of area  170
create data of area  234
get lag encodings of area  234
create data of area  48
get lag encodings of area  48
create data of area  186
get lag encodings of area  186
create data of area  142
get lag encodings of area  142
create data of area  107
get lag encodings of area  107
create data of area  163
get lag encodings of area  163
create data of area  68
get lag encodings of area  68
create dat

Epoch 13/150
#Current LearningRate:  0.001

Epoch 00013: val_loss did not improve from 0.01159
Epoch 14/150
#Current LearningRate:  0.001

Epoch 00014: val_loss improved from 0.01159 to 0.01117, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 15/150
#Current LearningRate:  0.001

Epoch 00015: val_loss improved from 0.01117 to 0.01108, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 16/150
#Current LearningRate:  0.001

Epoch 00016: val_loss improved from 0.01108 to 0.01093, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 17/150
#Current LearningRate:  0.001

Epoch 00017: val_loss improved from 0.01093 to 0.01069, savi

#Current LearningRate:  0.001

Epoch 00033: val_loss improved from 0.00965 to 0.00956, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 34/150
#Current LearningRate:  0.001

Epoch 00034: val_loss did not improve from 0.00956
Epoch 35/150
#Current LearningRate:  0.001

Epoch 00035: val_loss did not improve from 0.00956
Epoch 36/150
#Current LearningRate:  0.001

Epoch 00036: val_loss did not improve from 0.00956
Epoch 37/150
#Current LearningRate:  0.001

Epoch 00037: val_loss improved from 0.00956 to 0.00953, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 38/150
#Current LearningRate:  0.001

Epoch 00038: val_loss did not improve from 0.00953
Epoch 39/150
#Current LearningRate:  0.001

Epoch 00039: val_loss improved from 0.00953 to 0.00935,

#Current LearningRate:  0.001

Epoch 00083: val_loss did not improve from 0.00852
Epoch 84/150
#Current LearningRate:  0.001

Epoch 00084: val_loss did not improve from 0.00852
Epoch 85/150
#Current LearningRate:  0.001

Epoch 00085: val_loss did not improve from 0.00852
Epoch 86/150
#Current LearningRate:  0.001

Epoch 00086: val_loss improved from 0.00852 to 0.00849, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 87/150
#Current LearningRate:  0.001

Epoch 00087: val_loss did not improve from 0.00849
Epoch 88/150
#Current LearningRate:  0.001

Epoch 00088: val_loss improved from 0.00849 to 0.00847, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 89/150
#Current LearningRate:  0.001

Epoch 00089: val_loss did not improve from 0.00847
Epoc

Epoch 111/150
#Current LearningRate:  0.001

Epoch 00111: val_loss did not improve from 0.00824
Epoch 112/150
#Current LearningRate:  0.001

Epoch 00112: val_loss did not improve from 0.00824
Epoch 113/150
#Current LearningRate:  0.001

Epoch 00113: val_loss did not improve from 0.00824
Epoch 114/150
#Current LearningRate:  0.001

Epoch 00114: val_loss improved from 0.00824 to 0.00822, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 115/150
#Current LearningRate:  0.001

Epoch 00115: val_loss did not improve from 0.00822
Epoch 116/150
#Current LearningRate:  0.001

Epoch 00116: val_loss did not improve from 0.00822
Epoch 117/150
#Current LearningRate:  0.001

Epoch 00117: val_loss did not improve from 0.00822
Epoch 118/150
#Current LearningRate:  0.001

Epoch 00118: val_loss did not improve from 0.00822
Epoch 119/150
#Current LearningRate:  0.001

Epoch 00119: va

#Current LearningRate:  0.001

Epoch 00139: val_loss improved from 0.00810 to 0.00808, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 140/150
#Current LearningRate:  0.001

Epoch 00140: val_loss did not improve from 0.00808
Epoch 141/150
#Current LearningRate:  0.001

Epoch 00141: val_loss did not improve from 0.00808
Epoch 142/150
#Current LearningRate:  0.001

Epoch 00142: val_loss improved from 0.00808 to 0.00807, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/results/Stacked_LSTM/Hyperparam_tuning_y2011/Best_Models/TEST_Runtime_bestmodel.h5
Epoch 143/150
#Current LearningRate:  0.001

Epoch 00143: val_loss did not improve from 0.00807
Epoch 144/150
#Current LearningRate:  0.001

Epoch 00144: val_loss improved from 0.00807 to 0.00804, saving model to /media/vincent/harddrive/ML-Projects_all/NY_Cab_Project/NY_Cab_Data/resul

RMSE per TS 13 for model: results_2012: 57.18100977484582
RMSE per TS 14 for model: results_2012: 52.20018582152766
RMSE per TS 15 for model: results_2012: 51.607301250153476
RMSE per TS 16 for model: results_2012: 47.051954189885755
RMSE per TS 17 for model: results_2012: 54.754109445745364
RMSE per TS 18 for model: results_2012: 91.45701827380273
RMSE per TS 19 for model: results_2012: 45.83150482173603
Avg.RMSE for multivariate model: results_2012: 66.3285582958363


## loading of pre-trained model

In [None]:
#create model instance:
multLSTM = MultivariateLSTM()

start_train_year = '2009'
last_train_set_year = '2010'
validation_set_year = '2011' 
test_set_year = '2012' 


#update instance with model from disk:
prediction_model = load_pretrained_model_from_disk('MultivarLSTM')
multLSTM.load_model(prediction_model)

#make predictions with loaded model:      
results_tuple = multiLSTM_model_new.generate_data_get_predictions(multivar_ts_series, start_train_year, last_train_set_year,
                                                                  validation_set_year, test_set_year, verbose=1)
