In [1]:
import pandas as pd
import numpy as np

import math

In [2]:
df = pd.read_csv('data/lstm_results.csv')

In [3]:
df['error'] = df['forecast'] - df['Close']
df['abs_error'] = np.abs(df['forecast'] - df['Close'])

## Calculate ups

In [4]:

def actual_up(row):
    time = row['time']
    calc_df = df[df['time'] == time]

    time_adj = time - 1
    base = calc_df['Close'][time_adj * 63]

    if row.name % 63 == 0:
        if row.name == 0:
            diff = row['Close'] - 621.38 > 0
        else:
            close = df['Close'][time_adj * 63 - 63]
            diff = base - close > 0
    else:
        diff = row['Close'] - base > 0
    
    print(row.name, end=" ") if row.name % 5000 == 0 else None # Just to show progress (of 63000)
    
    return diff

df['actual_up'] = df.apply(lambda x: actual_up(x), axis=1)

0 5000 10000 15000 20000 25000 30000 35000 40000 45000 50000 55000 60000 

In [5]:
def forecast_up(row):
    time = row['time']
    calc_df = df[df['time'] == time]

    time_adj = time - 1

    if time_adj == 0:
        diff = row['forecast'] - 621.38 > 0
    else:
        base = calc_df['Close'][time_adj * 63]
        diff = row['forecast'] - base > 0

    print(row.name, end=" ") if row.name % 5000 == 0 else None # Just to show progress (of 63000)
    
    return diff

df['forecast_up'] = df.apply(lambda x: forecast_up(x), axis=1)

0 5000 10000 15000 20000 25000 30000 35000 40000 45000 50000 55000 60000 

In [6]:
# creating confusion column

def confusion(actual, forecast):
    if (actual and forecast):
        return 'TP'
    
    if (actual and not forecast):
        return 'FN'
    
    if (not actual and forecast):
        return 'FP'
    
    if (not actual and not forecast):
        return 'TN'
    
    return False

df['confusion'] = df.apply(lambda x: confusion(x['actual_up'], x['forecast_up']), axis=1)

In [12]:
df.head(64)

Unnamed: 0,time,Close,forecast,error,abs_error,actual_up,forecast_up,confusion
0,1.0,622.77,621.587524,-1.182476,1.182476,True,True,TP
1,1.0,618.70,621.805115,3.105115,3.105115,False,True,FP
2,1.0,617.12,622.214722,5.094722,5.094722,False,True,FP
3,1.0,621.28,622.782288,1.502288,1.502288,False,True,FP
4,1.0,622.31,623.443665,1.133665,1.133665,False,True,FP
...,...,...,...,...,...,...,...,...
59,1.0,671.08,635.086121,-35.993879,35.993879,True,True,TP
60,1.0,668.82,636.018921,-32.801079,32.801079,True,True,TP
61,1.0,668.02,637.005066,-31.014934,31.014934,True,True,TP
62,1.0,663.33,638.083069,-25.246931,25.246931,True,True,TP


## Creating cross evaluation scores for each of the 1000 periods

In [7]:
# Creating a dataframe with all the columns needed

cross_df = pd.DataFrame(columns=[
    "mape_1", 
    "mape_3",
    "mape_5",
    "mape_21",
    "mape_63",
    "rmse_1",
    "rmse_3",
    "rmse_5",
    "rmse_21",
    "rmse_63",
    'precision_1',
    'precision_3',
    'precision_5',
    'precision_21',
    'precision_63',
    'recall_1',
    'recall_3',
    'recall_5',
    'recall_21',
    'recall_63',
    'fscore_1',
    'fscore_3',
    'fscore_5',
    'fscore_21',
    'fscore_63',
])

In [8]:
# Cross evaluation function

def cross_evaluate(df, n_periods):
    df = df[ : n_periods]
    mape = ((df["abs_error"] / df["Close"]).sum() / n_periods) * 100
    rmse = math.sqrt(pow(df["error"].sum(), 2) / n_periods)

    tp = len(df[df['confusion'] == 'TP'])
    fp = len(df[df['confusion'] == 'FP'])
    fn = len(df[df['confusion'] == 'FN'])

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0 # if else för att undvika division by zero errror
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    fscore = (2*precision*recall)/(precision+recall) if (precision + recall > 0) else 0

    return mape, rmse, precision, recall, fscore

In [9]:
# For every 63 forecasts of the 1000 periods forecasted, create scores

forecast_len = 1000
for i in range(forecast_len):
    cross_merged_df = df[df['time'] == i+1] # since indexing starts at 1
    one = cross_evaluate(cross_merged_df, 1)
    three = cross_evaluate(cross_merged_df, 3)
    five = cross_evaluate(cross_merged_df, 5)
    twentyone = cross_evaluate(cross_merged_df, 21)
    sixtythree = cross_evaluate(cross_merged_df, 63)

    cross_df = cross_df.append({
        'mape_1': one[0],
        'mape_3': three[0],
        'mape_5': five[0],
        'mape_21': twentyone[0],
        'mape_63': sixtythree[0],
        'rmse_1': one[1],
        'rmse_3': three[1],
        'rmse_5': five[1],
        'rmse_21': twentyone[1],
        'rmse_63': sixtythree[1],
        'precision_1': one[2],
        'precision_3': three[2],
        'precision_5': five[2],
        'precision_21': twentyone[2],
        'precision_63': sixtythree[2],
        'recall_1': one[3],
        'recall_3': three[3],
        'recall_5': five[3],
        'recall_21': twentyone[3],
        'recall_63': sixtythree[3],
        'fscore_1': one[4],
        'fscore_3': three[4],
        'fscore_5': five[4],
        'fscore_21': twentyone[4],
        'fscore_63': sixtythree[4],
    }, ignore_index=True)

    step = i % 100
    if step == 0:
        print(i, end=" ")

print('done!')

0 100 200 300 400 500 600 700 800 900 done!


In [10]:
# Only the results when t=1

cross_df[:1].transpose()

Unnamed: 0,0
mape_1,0.189874
mape_3,0.505772
mape_5,0.388258
mape_21,0.887633
mape_63,2.925779
rmse_1,1.182476
rmse_3,4.051475
rmse_5,4.317093
rmse_21,20.974583
rmse_63,150.206292


In [11]:
n = cross_df.count()[0]
mean = cross_df.mean()
upper = cross_df.mean() + 1.64 * cross_df.std() / math.sqrt(n)
lower = cross_df.mean() - 1.64 * cross_df.std() / math.sqrt(n)

ci_df = pd.DataFrame(columns=['measure', 'mean', 'lower', 'upper'])

for i in range(25):
    ci_df = ci_df.append({
        'measure': cross_df.columns[i],
        'mean': mean[i],
        'lower': lower[i],
        'upper': upper[i]
    }, ignore_index=True)

ci_df

Unnamed: 0,measure,mean,lower,upper
0,mape_1,0.613545,0.571887,0.655204
1,mape_3,0.926777,0.871642,0.981913
2,mape_5,1.190527,1.117767,1.263287
3,mape_21,2.749462,2.582364,2.91656
4,mape_63,5.403585,5.187812,5.619358
5,rmse_1,4.965353,4.643807,5.286898
6,rmse_3,12.155956,11.402104,12.909808
7,rmse_5,19.816034,18.545149,21.08692
8,rmse_21,92.261077,86.397433,98.12472
9,rmse_63,322.016801,307.533696,336.499906
