## NOTE: This should not be run as a separate file, will include in lstm2.ipynb later
This is just for testing and not screwing anything working up.

Remember:

- Changed len_forecasts to 100 instead of len(test)
- Ignored date transformation as we don't have to make graphs
- And only RMSE/MAPE

In [1]:
# https://machinelearningmastery.com/multi-step-time-series-forecasting-long-short-term-memory-networks-python/

import pandas as pd
from datetime import datetime, timedelta, date
import numpy as np

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

import math
import matplotlib.pyplot as plt

np.random.seed(1337)

# Creating functions

In [2]:
# transform series into train and test sets for supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()

    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [("var%d(t-%d)" % (j + 1, i)) for j in range(n_vars)]

    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [("var%d(t)" % (j + 1)) for j in range(n_vars)]
        else:
            names += [("var%d(t+%d)" % (j + 1, i)) for j in range(n_vars)]

    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names

    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)

    return agg


In [3]:
# transform series into train and test sets for supervised learning
def prepare_data(series, n_test, n_lag, n_seq):
    # extract raw values
    raw_values = series.values

    # transform data to be stationary
    diff_values = raw_values
    diff_values = diff_values.reshape(len(diff_values), 1)

    # rescale values to -1, 1
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled_values = scaler.fit_transform(diff_values)
    scaled_values = scaled_values.reshape(len(scaled_values), 1)

    # transform into supervised learning problem X, y
    supervised = series_to_supervised(scaled_values, n_lag, n_seq)
    supervised_values = supervised.values

    # split into train and test sets
    train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
    return scaler, train, test

In [4]:
def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons):
    # reshape training into [samples, timesteps, features]
    X, y = train[:, 0:n_lag], train[:, n_lag:]

    X = X.reshape(X.shape[0], 1, X.shape[1])

    # design network
    model = Sequential()
    model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(y.shape[1]))
    model.compile(loss="mean_squared_error", optimizer="adam")

    model.fit(X, y, epochs=nb_epoch, batch_size=n_batch, verbose=2, shuffle=False)

    return model

In [5]:
# make one forecast with an LSTM,
def forecast_lstm(model, X, n_batch):
    # reshape input pattern to [samples, timesteps, features]
    X = X.reshape(1, 1, len(X))

    # make forecast
    forecast = model.predict(X, batch_size=n_batch)

    # convert to array
    return [x for x in forecast[0, :]]

In [6]:
# evaluate the persistence model
def make_forecasts(model, n_batch, train, test, n_lag, n_seq):
    forecasts = list()
    forecast_len = 100

    print(f'Forecast x of {forecast_len}:', end=" ")
    for i in range(forecast_len):
        X, y = test[i, 0:n_lag], test[i, n_lag:]
        # make forecast
        forecast = forecast_lstm(model, X, n_batch)
        # store the forecast
        forecasts.append(forecast)

        # Printing current status in tens
        tens = i % 10
        if tens == 0:
            print(i, end=" ")

    return forecasts

In [7]:
# inverse data transform on forecasts
def inverse_transform(series, forecasts, scaler):
    inverted = list()
    for i in range(len(forecasts)):

        # create array from forecast
        forecast = np.array(forecasts[i])
        forecast = forecast.reshape(1, len(forecast))

        # invert scaling
        inv_scale = scaler.inverse_transform(forecast)
        inv_scale = inv_scale[0, :]

        inverted.append(inv_scale)

    return inverted

# Fitting and predicting

In [8]:
# load dataset
logreturns = "data/final.csv"
series = pd.read_csv(logreturns, usecols=["Exchange.Date", "logreturns"], header=0, index_col=0, squeeze=True)

# configure
n_lag = 1 # same as ARMA-GARCH
n_seq = 63  #  number of periods forecast
test_share = 0.25
n_test = int(len(series) * test_share)
n_epochs = 5
n_batch = 1
n_neurons = 50

print("Preparing data...")
scaler, train, test = prepare_data(series, n_test, n_lag, n_seq)

print("Fitting model...")
model = fit_lstm(train, n_lag, n_seq, n_batch, n_epochs, n_neurons)

print("Making forecasts...")
forecasts = make_forecasts(model, n_batch, train, test, n_lag, n_seq)

print("\nInverting forecasts...")
forecasts = inverse_transform(series, forecasts, scaler)
print("Done!")

Preparing data...
Fitting model...
Epoch 1/5
3360/3360 - 5s - loss: 0.0090
Epoch 2/5
3360/3360 - 3s - loss: 0.0078
Epoch 3/5
3360/3360 - 3s - loss: 0.0077
Epoch 4/5
3360/3360 - 3s - loss: 0.0077
Epoch 5/5
3360/3360 - 3s - loss: 0.0077
Making forecasts...
Forecast x of 100: 0 10 20 30 40 50 60 70 80 90 
Inverting forecasts...
Done!


# Cross Evaluating 100 periods

In [38]:
original_df = pd.read_csv("data/final.csv", usecols=["Exchange.Date", "logreturns", "Close"])
original_df.index = original_df['Exchange.Date']
original_df.drop('Exchange.Date', axis=1)

Unnamed: 0_level_0,Close,logreturns
Exchange.Date,Unnamed: 1_level_1,Unnamed: 2_level_1
37620,100.000000,0.000000
37623,100.995362,0.009904
37624,101.623083,0.006196
37627,101.623083,0.000000
37628,102.392342,0.007541
...,...,...
44235,1249.100000,0.005668
44236,1245.630000,-0.002782
44237,1243.380000,-0.001808
44238,1254.090000,0.008577


In [79]:
def evaluate(df, n_periods):
    n_periods = 62 if n_periods == 63 else n_periods
    df = df[-63:-63+n_periods]
    mape = ((df["abs_error"] / df["Close"]).sum() / n_periods) * 100
    rmse = math.sqrt(pow(df["error"].sum(), 2) / n_periods)
    return mape, rmse

cross_df = pd.DataFrame(columns=[
    "mape_1", 
    "mape_3",
    "mape_5",
    "mape_21",
    "mape_63",
    "rmse_1",
    "rmse_3",
    "rmse_5",
    "rmse_21",
    "rmse_63"
])

for i in range(len(forecasts)):
    train_df = original_df[:-n_test + i].copy()
    train_df["forecast"] = train_df["Close"]

    last_train = train_df["Close"].values[-1]
    price_forecasts = np.exp(np.cumsum(forecasts[i]) + math.log(last_train))

    merged_df = original_df[-n_test + i:-n_test + i + 63].copy()
    merged_df["forecast"] = price_forecasts

    merged_df["error"] = merged_df["forecast"] - merged_df["Close"]
    merged_df["abs_error"] = np.abs(merged_df["forecast"] - merged_df["Close"])

    one = evaluate(merged_df, 1)
    three = evaluate(merged_df, 3)
    five = evaluate(merged_df, 5)
    twentyone = evaluate(merged_df, 21)
    sixtythree = evaluate(merged_df, 63)

    cross_df = cross_df.append({
        'mape_1': one[0],
        'mape_3': three[0],
        'mape_5': five[0],
        'mape_21': twentyone[0],
        'mape_63': sixtythree[0],
        'rmse_1': one[1],
        'rmse_3': three[1],
        'rmse_5': five[1],
        'rmse_21': twentyone[1],
        'rmse_63': sixtythree[1],
    }, ignore_index=True)

cross_df.head()

Unnamed: 0,mape_1,mape_3,mape_5,mape_21,mape_63,rmse_1,rmse_3,rmse_5,rmse_21,rmse_63
0,0.174163,0.548679,0.452738,0.878127,3.019335,1.084636,4.623117,5.29843,19.232196,152.996531
1,0.69615,0.701604,0.485295,1.20145,3.89803,4.30708,7.515089,6.716697,28.399187,198.357708
2,0.319051,0.295229,0.366385,1.219771,3.181852,1.968928,0.897587,3.338171,34.673138,164.556947
3,0.631253,0.769837,0.853429,2.068941,4.745625,3.921846,8.303919,11.903305,60.231971,245.860423
4,0.132524,0.325776,0.527773,1.88871,4.970187,0.824709,3.522625,7.398902,55.09364,257.900757


In [78]:
cross_df.describe()

Unnamed: 0,mape_1,mape_3,mape_5,mape_21,mape_63,rmse_1,rmse_3,rmse_5,rmse_21,rmse_63
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.452493,0.675881,0.830413,1.780169,5.1809,2.990469,7.125819,10.835464,49.289116,285.139219
std,0.379413,0.455349,0.510669,0.828867,1.651811,2.508202,5.56076,8.493985,30.302296,110.592964
min,0.001283,0.082064,0.209006,0.546482,1.286949,0.008662,0.163984,0.012091,0.303191,3.898016
25%,0.17381,0.338734,0.443037,1.188126,4.119238,1.133718,2.961218,3.540514,27.662235,210.652777
50%,0.349202,0.613664,0.727641,1.639635,5.185377,2.301006,6.265102,8.823601,45.244415,278.343059
75%,0.632679,0.860807,1.176574,2.150871,6.289932,4.002759,9.965476,17.338111,64.1179,365.43038
max,1.911459,2.92785,3.177644,5.041456,8.837445,12.921274,34.140031,47.918366,158.952991,503.228713
