In [None]:
import pandas as pd
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
import numpy as np

# Load forecast data from CSV files
bagging_dt = pd.read_csv('../../results/bagging_dt.csv')
har = pd.read_csv('../../results/har.csv')
garch = pd.read_csv('../../results/garch.csv')
rf = pd.read_csv('../../results/rf.csv')
svr = pd.read_csv('../../results/svr.csv')
ewma = pd.read_csv('../../results/ewma.csv')

# Rename the "Predicted" column to the model name for each DataFrame
bagging_dt = bagging_dt.rename(columns={'Predicted': 'bagging_dt'})
har = har.rename(columns={'Predicted': 'har'})
garch = garch.rename(columns={'Predicted': 'garch'})
rf = rf.rename(columns={'Predicted': 'rf'})
svr = svr.rename(columns={'Predicted': 'svr'})
ewma = ewma.rename(columns={'Predicted': 'ewma'})

# Some of the risk group names are not standardised (Caps, lower case, etc.)
# We need to standardise them before merging the DataFrames
bagging_dt['Risk Group'] = bagging_dt['Risk Group'].str.lower()
har['Risk Group'] = har['Risk Group'].str.lower()
garch['Risk Group'] = garch['Risk Group'].str.lower()
rf['Risk Group'] = rf['Risk Group'].str.lower()
svr['Risk Group'] = svr['Risk Group'].str.lower()
ewma['Risk Group'] = ewma['Risk Group'].str.lower()


# Print the first date of all models for checking
print(har['Date'].iloc[0])
print(garch['Date'].iloc[0])
print(rf['Date'].iloc[0])
print(svr['Date'].iloc[0])
print(ewma['Date'].iloc[0])
print(bagging_dt['Date'].iloc[0])

# Start with one DataFrame and merge the others on the common columns
data = bagging_dt[['Date', 'Ticker', 'Risk Group', 'Frequency', 'Actual', 'bagging_dt']]

data = data.merge(har[['Date', 'Ticker', 'Risk Group', 'Frequency', 'har']],
                  on=['Date', 'Ticker', 'Risk Group', 'Frequency'], how='outer')
data = data.merge(garch[['Date', 'Ticker', 'Risk Group', 'Frequency', 'garch']],
                  on=['Date', 'Ticker', 'Risk Group', 'Frequency'], how='outer')
data = data.merge(rf[['Date', 'Ticker', 'Risk Group', 'Frequency', 'rf']],
                  on=['Date', 'Ticker', 'Risk Group', 'Frequency'], how='outer')
data = data.merge(svr[['Date', 'Ticker', 'Risk Group', 'Frequency', 'svr']],
                  on=['Date', 'Ticker', 'Risk Group', 'Frequency'], how='outer')
data = data.merge(ewma[['Date', 'Ticker', 'Risk Group', 'Frequency', 'ewma']],
                  on=['Date', 'Ticker', 'Risk Group', 'Frequency'], how='outer')


print(data.head())

2024-04-02 01:00:00+00:00
2024-04-02 01:00:00+00:00
2024-04-02 01:00:00+00:00
2024-04-02 01:00:00+00:00
2024-04-02 01:00:00+00:00
2024-04-02 01:00:00+00:00
                        Date    Ticker Risk Group Frequency     Actual  \
0  2024-04-02 01:00:00+00:00   BTC-USD        low    hourly -14.731732   
1  2024-04-02 01:00:00+00:00  DOGE-USD       high    hourly -10.180126   
2  2024-04-02 01:00:00+00:00   ETH-USD        low    hourly -13.544106   
3  2024-04-02 01:00:00+00:00   SOL-USD       high    hourly -13.904746   
4  2024-04-02 01:00:00+00:00   XRP-USD     medium    hourly  -9.037092   

   bagging_dt        har      garch         rf        svr       ewma  
0  -13.012559 -12.717092 -12.434889 -13.095386 -12.625901 -12.417476  
1  -11.402712 -11.096927 -10.314513 -11.573167 -10.983400 -10.155544  
2  -12.694841 -13.315415 -12.036654 -12.615214 -12.464753 -12.092227  
3  -11.667074 -11.123150 -11.039991 -11.701376 -11.148741 -11.010307  
4  -12.689666 -12.132799 -11.660224 -12.6965

In [None]:

def calculate_metrics(actual, predicted):
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(actual, predicted)
    r2 = r2_score(actual, predicted)
    qlike = np.mean(actual / predicted - np.log(actual / predicted) - 1)
    return mse, rmse, mape, r2, qlike

# List of model names
model_names = ['bagging_dt', 'har', 'garch', 'rf', 'svr', 'ewma']

# Create a list to hold metric rows.
rows = []

# Group the data by Risk Group and Frequency.
grouped = data.groupby(['Risk Group', 'Frequency'])

for (risk, freq), group in grouped:
    # For each model, compute evaluation metrics within the group.
    for model in model_names:
        mse, rmse, mape, r2, qlike = calculate_metrics(group['Actual'], group[model])
        rows.append({
            'Risk Group': risk,
            'Frequency': freq,
            'Model': model,
            'RMSE': rmse,
            'MAPE': mape,
            'R2': r2,
            'QLIKE': qlike
        })

# Convert the list of rows into a DataFrame.
grouped_metrics_df = pd.DataFrame(rows)

print("Minimum and maximum RMSE:")
print(grouped_metrics_df.groupby(['Model'])['RMSE'].agg(['min', 'max']))
print("Minimum and Maximum R2:")
print(grouped_metrics_df.groupby(['Model'])['R2'].agg(['min', 'max']))

Minimum and maximum RMSE:
                 min       max
Model                         
bagging_dt  0.859443  2.485639
ewma        0.885295  2.553039
garch       0.884789  2.443261
har         0.897821  2.573997
rf          0.867560  2.488804
svr         0.851058  2.471022
Minimum and Maximum R2:
                 min       max
Model                         
bagging_dt  0.019736  0.388133
ewma       -0.059974  0.368939
garch       0.046368  0.327220
har        -0.007979  0.318631
rf          0.017238  0.387768
svr         0.023019  0.310761


In [3]:
# Extract EWMA RMSE for each Risk Group and Frequency
ewma_rmse = grouped_metrics_df[grouped_metrics_df['Model'] == 'ewma'][['Frequency','Risk Group', 'RMSE']]
ewma_rmse = ewma_rmse.rename(columns={'RMSE': 'ewma_RMSE'}).set_index(['Frequency','Risk Group'])

# Merge the EWMA RMSE back into the original DataFrame using the keys
grouped_metrics_df = grouped_metrics_df.merge(ewma_rmse, left_on=['Frequency','Risk Group'], right_index=True)

# Calculate the Relative RMSE by dividing RMSE by the benchmark ewma_RMSE
grouped_metrics_df['Relative RMSE'] = grouped_metrics_df['RMSE'] / grouped_metrics_df['ewma_RMSE']


In [4]:
# Order the Summary table: Risk Group, Frequency, Model, Relative RMSE, R2, MAPE, QLIKE
summary = grouped_metrics_df[['Frequency','Risk Group', 'Model', 'Relative RMSE', 'R2', 'MAPE', 'QLIKE']]

# Order the risk groups to be low, medium, high
summary['Risk Group'] = pd.Categorical(summary['Risk Group'], categories=['low', 'medium', 'high'], ordered=True)
summary = summary.sort_values(['Frequency', 'Risk Group', 'Relative RMSE'])

# Order the frequ to be hourly, 3hourly, daily
summary['Frequency'] = pd.Categorical(summary['Frequency'], categories=['hourly', '3hourly', 'daily'], ordered=True)
summary = summary.sort_values(['Frequency', 'Risk Group', 'Relative RMSE'])

# Sort it by Risk Group, Frequency, and Relative RMSE (from highest to lowest)
summary = summary.sort_values(['Frequency', 'Risk Group', 'Relative RMSE'], ascending=[True, True, False])
summary

summary.to_csv('../../results/evaluation/summary.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary['Risk Group'] = pd.Categorical(summary['Risk Group'], categories=['low', 'medium', 'high'], ordered=True)


## Best Models
This study will now identify the best models for each frequency and risk group

#### R2
In some papers, the lowest RMSE does not represent the highest R2. However in this paper, the lowest RMSE corresponded with the highest RMSE. So in this component, we will extract the highest R2

In [6]:
summary = summary.sort_values(['Frequency', 'Risk Group', 'R2'], ascending=[True, True, False])
best_models_r2 = summary.groupby(['Frequency','Risk Group']).first()
best_models_r2
best_models_r2.to_csv('../../results/evaluation/best_models_r2.csv')


  best_models_r2 = summary.groupby(['Frequency','Risk Group']).first()


Diebold-Mariano test

In [None]:
import sys
sys.path.insert(0, '../../')  # Ensure the module is in the path
from dm_test import dm_test  # Import the DM test function
import pandas as pd

# List of alternative models to compare (excluding the benchmark 'ewma')
models = ['bagging_dt', 'har', 'garch', 'rf', 'svr']

# Initialize a list to store DM test results
dm_results = []

# Group the DataFrame by 'Risk Group' and 'Frequency'
for (risk_group, freq), group in data.groupby(['Risk Group', 'Frequency']):
    # Extract actual values and the EWMA forecast as lists
    actual_lst = group['Actual'].tolist()
    ewma_lst = group['ewma'].tolist()
    
    # Loop over each alternative model and perform the DM test comparing its forecast against EWMA
    for model in models:
        model_pred_lst = group[model].tolist()
        try:
            # Run the DM test using h=1 and the "MSE" criterion
            dm_result = dm_test(actual_lst, model_pred_lst, ewma_lst, h=1, crit="MSE", power=2)
            
            # Determine recommendation based on significance and sign of DM statistic
            if dm_result.p_value < 0.05:
                if dm_result.DM < 0:
                    recommendation = "Alternate model should be used"
                else:
                    recommendation = "EWMA model should be used"
            else:
                recommendation = "No significant difference"
            
            # Append the results along with recommendation
            dm_results.append({
                'Risk Group': risk_group,
                'Frequency': freq,
                'Model': model,
                'DM Statistic': dm_result.DM,
                'p-value': dm_result.p_value,
                'Recommendation': recommendation
            })
        except Exception as e:
            print(f"Error for group {risk_group} {freq}, model {model}: {e}")

# Convert the results into a DataFrame and display it
dm_results_df = pd.DataFrame(dm_results)
print(dm_results_df)


   Risk Group Frequency       Model  DM Statistic       p-value  \
0        high   3hourly  bagging_dt      4.069463  4.778955e-05   
1        high   3hourly         har      7.053845  1.957712e-12   
2        high   3hourly       garch      6.413745  1.539905e-10   
3        high   3hourly          rf      4.423390  9.905995e-06   
4        high   3hourly         svr      6.097852  1.149130e-09   
5        high     daily  bagging_dt     -1.525301  1.276819e-01   
6        high     daily         har      0.980475  3.272251e-01   
7        high     daily       garch     -0.068204  9.456446e-01   
8        high     daily          rf     -0.953183  3.408598e-01   
9        high     daily         svr     -1.774776  7.641395e-02   
10       high    hourly  bagging_dt    -10.598774  3.668274e-26   
11       high    hourly         har     -6.645093  3.125613e-11   
12       high    hourly       garch    -17.788339  3.960827e-70   
13       high    hourly          rf    -10.223755  1.837676e-2