In [23]:
import pandas as pd
import numpy as np

import math

In [24]:
# Reading r_cross_val.csv

df = pd.read_csv('data/r_cross_val.csv')

In [25]:
df['error'] = df['forecast'] - df['Close']
df['abs_error'] = np.abs(df['forecast'] - df['Close'])

## Calculate ups

In [26]:

def actual_up(row):
    time = row['time']
    calc_df = df[df['time'] == time]

    time_adj = time - 1
    base = calc_df['Close'][time_adj * 63]

    if row.name % 63 == 0:
        if row.name == 0:
            diff = row['Close'] - 621.38 > 0
        else:
            close = df['Close'][time_adj * 63 - 63]
            diff = base - close > 0
    else:
        diff = row['Close'] - base > 0
    
    print(row.name, end=" ") if row.name % 5000 == 0 else None # Just to show progress (of 63000)
    
    return diff

df['actual_up'] = df.apply(lambda x: actual_up(x), axis=1)

0 5000 10000 15000 20000 25000 30000 35000 40000 45000 50000 55000 60000 

In [27]:
def forecast_up(row):
    time = row['time']
    calc_df = df[df['time'] == time]

    time_adj = time - 1

    if time_adj == 0:
        diff = row['forecast'] - 621.38 > 0
    else:
        base = calc_df['Close'][time_adj * 63]
        diff = row['forecast'] - base > 0

    print(row.name, end=" ") if row.name % 5000 == 0 else None # Just to show progress (of 63000)
    
    return diff

df['forecast_up'] = df.apply(lambda x: forecast_up(x), axis=1)

0 5000 10000 15000 20000 25000 30000 35000 40000 45000 50000 55000 60000 

In [28]:
df.head(64)

Unnamed: 0,time,Close,forecast,lower,upper,error,abs_error,actual_up,forecast_up
0,1,622.77,622.745083,614.850489,630.741043,-0.024917,0.024917,True,True
1,1,618.70,623.922645,607.986288,640.276720,5.222645,5.222645,False,True
2,1,617.12,624.955354,600.876362,649.999267,7.835354,7.835354,False,True
3,1,621.28,625.876251,593.586507,659.922484,4.596251,4.596251,False,True
4,1,622.31,626.710896,586.166551,670.059638,4.400896,4.400896,False,True
...,...,...,...,...,...,...,...,...,...
59,1,671.08,658.205370,251.688108,1721.314181,-12.874630,12.874630,True,True
60,1,668.82,658.773490,247.648335,1752.414414,-10.046510,10.046510,True,True
61,1,668.02,659.342101,243.671832,1784.088057,-8.677899,8.677899,True,True
62,1,663.33,659.911202,239.757723,1816.345223,-3.418798,3.418798,True,True


In [29]:
# creating confusion column

def confusion(actual, forecast):
    if (actual and forecast):
        return 'TP'
    
    if (actual and not forecast):
        return 'FN'
    
    if (not actual and forecast):
        return 'FP'
    
    if (not actual and not forecast):
        return 'TN'
    
    return False

df['confusion'] = df.apply(lambda x: confusion(x['actual_up'], x['forecast_up']), axis=1)

## Creating cross evaluation scores for each of the 1000 periods

In [30]:
# Creating a dataframe with all the columns needed

cross_df = pd.DataFrame(columns=[
    "mape_1", 
    "mape_3",
    "mape_5",
    "mape_21",
    "mape_63",
    "rmse_1",
    "rmse_3",
    "rmse_5",
    "rmse_21",
    "rmse_63",
    'precision_1',
    'precision_3',
    'precision_5',
    'precision_21',
    'precision_63',
    'recall_1',
    'recall_3',
    'recall_5',
    'recall_21',
    'recall_63',
    'fscore_1',
    'fscore_3',
    'fscore_5',
    'fscore_21',
    'fscore_63',
])

In [31]:
# Cross evaluation function

def cross_evaluate(df, n_periods):
    df = df[ : n_periods]
    mape = ((df["abs_error"] / df["Close"]).sum() / n_periods) * 100
    rmse = math.sqrt(pow(df["error"].sum(), 2) / n_periods)

    tp = len(df[df['confusion'] == 'TP'])
    fp = len(df[df['confusion'] == 'FP'])
    fn = len(df[df['confusion'] == 'FN'])

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0 # if else för att undvika division by zero errror
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    fscore = (2*precision*recall)/(precision+recall) if (precision + recall > 0) else 0

    return mape, rmse, precision, recall, fscore

In [32]:
# For every 63 forecasts of the 1000 periods forecasted, create scores

forecast_len = 1000
for i in range(forecast_len):
    cross_merged_df = df[df['time'] == i+1] # since indexing starts at 1
    one = cross_evaluate(cross_merged_df, 1)
    three = cross_evaluate(cross_merged_df, 3)
    five = cross_evaluate(cross_merged_df, 5)
    twentyone = cross_evaluate(cross_merged_df, 21)
    sixtythree = cross_evaluate(cross_merged_df, 63)

    cross_df = cross_df.append({
        'mape_1': one[0],
        'mape_3': three[0],
        'mape_5': five[0],
        'mape_21': twentyone[0],
        'mape_63': sixtythree[0],
        'rmse_1': one[1],
        'rmse_3': three[1],
        'rmse_5': five[1],
        'rmse_21': twentyone[1],
        'rmse_63': sixtythree[1],
        'precision_1': one[2],
        'precision_3': three[2],
        'precision_5': five[2],
        'precision_21': twentyone[2],
        'precision_63': sixtythree[2],
        'recall_1': one[3],
        'recall_3': three[3],
        'recall_5': five[3],
        'recall_21': twentyone[3],
        'recall_63': sixtythree[3],
        'fscore_1': one[4],
        'fscore_3': three[4],
        'fscore_5': five[4],
        'fscore_21': twentyone[4],
        'fscore_63': sixtythree[4],
    }, ignore_index=True)

    step = i % 100
    if step == 0:
        print(i, end=" ")

print('done!')

0 100 200 300 400 500 600 700 800 900 done!


In [33]:
# Only the results when t=1

cross_df[:1].transpose()

Unnamed: 0,0
mape_1,0.004001
mape_3,0.705933
mape_5,0.712958
mape_21,0.541432
mape_63,1.063077
rmse_1,0.024917
rmse_3,7.524653
rmse_5,9.852218
rmse_21,2.323509
rmse_63,43.548246


In [34]:
n = cross_df.count()[0]
mean = cross_df.mean()
upper = cross_df.mean() + 1.64 * cross_df.std() / math.sqrt(n)
lower = cross_df.mean() - 1.64 * cross_df.std() / math.sqrt(n)

ci_df = pd.DataFrame(columns=['measure', 'mean', 'lower', 'upper'])

for i in range(25):
    ci_df = ci_df.append({
        'measure': cross_df.columns[i],
        'mean': mean[i],
        'lower': lower[i],
        'upper': upper[i]
    }, ignore_index=True)

ci_df

Unnamed: 0,measure,mean,lower,upper
0,mape_1,0.606831,0.566263,0.647399
1,mape_3,0.91034,0.8582,0.96248
2,mape_5,1.159973,1.090985,1.22896
3,mape_21,2.571405,2.398827,2.743983
4,mape_63,5.015313,4.761545,5.26908
5,rmse_1,4.905379,4.592236,5.218521
6,rmse_3,11.898708,11.180433,12.616983
7,rmse_5,19.183296,17.964496,20.402096
8,rmse_21,85.884945,79.899139,91.87075
9,rmse_63,294.066071,278.023847,310.108295
