In [4]:
import pandas as pd
import numpy as np
data = pd.read_csv("data_US_Europe.csv")
tsValues = data['EXUSEU']
import math
tsValuesLog = [math.log(ele) for ele in tsValues]

In [5]:
startPrediction = 100

In [6]:
totalData = len(tsValues)

In [7]:
def mean_error(pred,real):
    return np.mean([pred[i]-real[i] for i in range(len(pred))])
def mean_absolute_error(pred,real):
    return np.mean([np.abs(pred[i]-real[i]) for i in range(len(pred))])
def root_mean_square_error(pred,real):
    return np.sqrt(np.mean([pow(pred[i]-real[i],2) for i in range(len(pred))]))

In [8]:
PredictionOutputMAE = pd.DataFrame(index=['monthly','3_month','6_month','12_month'])
PredictionOutputRMSE = pd.DataFrame(index=['monthly','3_month','6_month','12_month'])
startPrediction = 100
totalData = len(tsValuesLog)

In [9]:
#benchmark
mae_ = []
rmse_ = []
for l in [1,3,6,12]:
    pred = [0]*(startPrediction+l-1)
    for i in range(totalData-startPrediction-l+1):
        pred.append(tsValuesLog[startPrediction-1+i])
    mae_.append(mean_absolute_error(pred[startPrediction+l-1:],tsValuesLog[startPrediction+l-1:]))
    rmse_.append(root_mean_square_error(pred[startPrediction+l-1:],tsValuesLog[startPrediction+l-1:]))
PredictionOutputMAE['RandomWalk'] = mae_
PredictionOutputRMSE['RandomWalk'] = rmse_

In [10]:
PredictionOutputMAE

Unnamed: 0,RandomWalk
monthly,0.013241
3_month,0.029604
6_month,0.04944
12_month,0.07547


In [15]:
#LSTM Model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Activation
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import random
#LSTMModel
class LstmModel:
    def __init__(self, lstm_cells_per_layer_used=100, loss_used='mean_absolute_error', optimizer_used='adam', epochs_used=100, batch_size_used=5, random_seed_used=1, sample_num=5, feature_length_used=5):
        self.model_name = 'LSTM_{}_{}_Model'.format(sample_num, feature_length_used)
        self.lstm_cells_per_layer_used = lstm_cells_per_layer_used
        self.loss_used = loss_used
        self.optimizer_used = optimizer_used
        self.epochs_used = epochs_used
        self.batch_size_used = batch_size_used
        self.model = None
        
        self.random_seed_used = random_seed_used
        np.random.seed(self.random_seed_used)
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        
        self.data_ori = None
        self.sample_num = sample_num
        self.feature_length_used = feature_length_used
        
        return
    
    def fit(self, data):
        keras.backend.clear_session()
        #sess = tf.Session(config=tf.ConfigProto(device_count={'gpu':0}))
        self.data_ori = data
        if len(self.data_ori) <= 1:
            self.model = None
            return
        
        self.sample_num = min(self.sample_num, len(self.data_ori))
        self.feature_length_used = min(self.feature_length_used, len(self.data_ori) - self.sample_num)
        
        if self.feature_length_used <= 0:
            self.sample_num -= 1
            self.feature_length_used = 1
            if self.sample_num <= 0:
                raise Exception('Insufficient data!')

        self.data = np.array(self.data_ori)[-(self.sample_num+self.feature_length_used):]
        self.data = self.data.astype(np.float64)
        self.data = self.scaler.fit_transform(self.data.reshape(-1, 1)).T[0]
        
        x_train, y_train = [], []
        for i in range(0, self.sample_num):
            feature_vec = []
            label_val = self.data[len(self.data) - self.sample_num + i]
            for j in range(0, self.feature_length_used):
                val = self.data[len(self.data) - self.sample_num - self.feature_length_used + i + j]
                feature_vec.append(val)
            x_train.append(feature_vec)
            y_train.append(label_val)
            
        x_train = np.array(x_train)
        y_train = np.array(y_train)
        
        x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
        
        self.model = Sequential()
        self.model.add(LSTM(self.lstm_cells_per_layer_used, input_shape=(1, self.feature_length_used)))
        self.model.add(Dense(1))
        self.model.compile(loss=self.loss_used, optimizer=self.optimizer_used)
        verbose_used = 0
        self.model.fit(x_train, y_train, epochs=self.epochs_used, batch_size=self.batch_size_used, verbose=verbose_used)
        
        return
        
    
    def predict(self, next_n_prediction):
        pred = []
        if self.model == None:
            if len(self.data_ori) <= 0:
                pred = [np.nan, ] * next_n_prediction
            else:
                pred = [self.data_ori[-1], ] * next_n_prediction
            return pred

        rest_prediction_num = next_n_prediction
        round_num = 0
        while rest_prediction_num > 0:
            x_test = []
            feature_vec = []
            for i in range(0, self.feature_length_used):
                val = self.data[self.sample_num+i+round_num]
                feature_vec.append(val)
            x_test.append(feature_vec)
            
            x_test = np.array(x_test)
            x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))
            
            predict_test = self.model.predict(x_test)
            
            predict_test_scaled = predict_test
            predict_test_scaled = [item[0] for item in predict_test_scaled]
            predict_test_scaled = np.array(predict_test_scaled)
            predict_test_scaled = predict_test_scaled.astype(np.float64)
            self.data = np.append(self.data, predict_test_scaled)
            
            predict_test = self.scaler.inverse_transform(predict_test)
            predict_test = [item[0] for item in predict_test]
            
            
            pred += predict_test
            
            round_num += 1
            rest_prediction_num -= len(predict_test)
        
        pred = pred[0:next_n_prediction]
        pred_pre = np.array(pred)
        pred_pre = pred_pre.astype(np.float64)
        pred = list(pred_pre)
        return pred

In [16]:
import keras
import keras.backend

In [None]:
import time
for feature_len in [4,5,6,8,12]:
    mae_ = []
    rmse_ = []
    start = time.time()
    for l in [1,3,6,12]:
        pred = [0]*(startPrediction+l-1)
        for i in range(totalData-startPrediction-(l-1)):
            model = LstmModel(sample_num=100-feature_len,feature_length_used=feature_len)
            model.fit(tsValuesLog[i:i+startPrediction])
            pred.append(model.predict(l)[l-1])
            #print(i,"end")
        mae = mean_absolute_error(pred[startPrediction+l-1:],tsValuesLog[startPrediction+l-1:])
        rmse = root_mean_square_error(pred[startPrediction+l-1:],tsValuesLog[startPrediction+l-1:])
        print(mae,rmse)
        mae_.append(mae)
        rmse_.append(rmse)
    end = time.time()
    print(time)
    name = 'LSTM_'+str(100-feature_len)+str(feature_len)
    PredictionOutputMAE[name] = mae_
    PredictionOutputRMSE[name] = rmse_

0.015278606204430096 0.020929380210527704
0.037932804753973814 0.05027107238407102
0.06969158834238798 0.09108852076778536
0.12950792985699994 0.1537366024910648
<module 'time' (built-in)>
0.01615072013274373 0.021533850937123435
0.03978920532190126 0.05109157479051978
0.07054382499153568 0.08942311490216837
0.12831846260561244 0.1520163227684137
<module 'time' (built-in)>
0.015479430812700989 0.02083243850951799
0.037249515318987576 0.04864168573831697
0.0677071243053365 0.08822343744008848
0.11938177366583926 0.14459483159851702
<module 'time' (built-in)>
0.015123708206460067 0.020850393187502706


In [18]:
PredictionOutputMAE

Unnamed: 0,RandomWalk,LSTM_964,LSTM_955,LSTM_946,LSTM_928,LSTM_8812
monthly,0.013241,0.015279,0.016151,0.015479,0.015124,0.01585
3_month,0.029604,0.037933,0.039789,0.03725,0.03793,0.038583
6_month,0.04944,0.069692,0.070544,0.067707,0.066606,0.06712
12_month,0.07547,0.129508,0.128318,0.119382,0.115212,0.120025


In [19]:
PredictionOutputRMSE

Unnamed: 0,RandomWalk,LSTM_964,LSTM_955,LSTM_946,LSTM_928,LSTM_8812
monthly,0.017759,0.020929,0.021534,0.020832,0.02085,0.021419
3_month,0.038925,0.050271,0.051092,0.048642,0.049787,0.049496
6_month,0.062944,0.091089,0.089423,0.088223,0.086274,0.085459
12_month,0.099058,0.153737,0.152016,0.144595,0.139478,0.145362
