In [75]:
import os
import numpy as np 
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Persistence/ naive results

We just have to load them, because we did allpreparations in 2.Base_models_final.ipynb

In [163]:
# Load results
naive_cols_df = pd.read_csv('results/naive/metrics_naive_columns.csv', header=[0, 1], index_col=0)
naive_countries_df = pd.read_csv('results/naive/metrics_naive_countries.csv', header=[0, 1], index_col=0)

# ARIMA results

Averaged results per column.

In [166]:
# Load results
# Please note that results have multiindex, therefore we have to read them 
# in a right way
# preds_df = pd.read_csv('datasets/predictions_full.csv', header=[0, 1], index_col=0)
metrics_df = pd.read_csv('results/arima/metrics_full.csv', header=[0, 1], index_col=0)

# Pivot the DataFrame to convert from long to wide format
mean_values = pd.DataFrame(metrics_df.mean(axis=0))
arima_cols_df = mean_values.unstack(level=-1)

# Set column names for the MultiIndex
#df_wide.columns = pd.MultiIndex.from_tuples([('ARIMA', 'MSE'), ('ARIMA', 'MAE'), ('ARIMA', 'RMSE')], names=['Model', 'Metrics'])
arima_cols_df.columns = pd.MultiIndex.from_product([['ARIMA'], ['MSE', 'MAE', 'RMSE']], names=['Model', 'Metrics'])

Averaged metrics per country.

In [167]:
# Loop over multiindex dataframe to get the columns that start with country index
# and also loop over the metrics to average

top_5_countries = ['DE', 'GB', 'ES', 'FR', 'IT']
metrics = ['MAE', 'MSE', 'RMSE']

mse_values = {}
mae_values = {}
rmse_values = {}

for country in top_5_countries:
    for metric in metrics:
        columns = [(col, metric) for col in metrics_df.columns.levels[0] if col.startswith(country)]
        for col, _ in columns:
            values = metrics_df[col, metric].dropna().tolist()
            
            if country not in mse_values:
                mse_values[country] = []
            if country not in mae_values:
                mae_values[country] = []
            if country not in rmse_values:
                rmse_values[country] = []

            if values:
                if metric == 'MAE':
                    mae_values[country].extend(values)
                elif metric == 'MSE':
                    mse_values[country].extend(values)
                elif metric == 'RMSE':
                    rmse_values[country].extend(values)

# Calculate average values for each metric
average_mse_arima = {country: sum(values) / len(values) for country, values in mse_values.items()}
average_mae_arima = {country: sum(values) / len(values) for country, values in mae_values.items()}
average_rmse_arima = {country: sum(values) / len(values) for country, values in rmse_values.items()}

# Create DataFrames for ARIMA model
arima_countries_df = pd.DataFrame({'MSE': average_mse_arima, 'MAE': average_mae_arima})

# Add MultiIndex columns 
arima_countries_df.columns = pd.MultiIndex.from_product([['ARIMA'], arima_countries_df.columns])


# Informer results

Average results countries.

In [170]:
import re

# Change to your path
file_path = "/Users/valentyna/Documents/Master_thesis_new/results/Informer/result_long_term_forecast.txt"

# Read the file and split lines
with open(file_path, "r") as file:
    data = file.readlines()

# Create dictionaries to store MSE and MAE values for each country
mse_values = {}
mae_values = {}

# Iterate through lines in groups of three
for i in range(0, len(data), 3):
    # Extract country code from the line using regular expression
    match = re.search(r'long_term_forecast__24_([A-Z]{2})_Informer', data[i])
    if match:
        country = match.group(1)
        # Initialize lists for MSE and MAE values if not already present
        if country not in mse_values:
            mse_values[country] = []
        if country not in mae_values:
            mae_values[country] = []
        # Extract MSE value from the MSE line
        mse_value = re.search(r'mse:(.*?),', data[i+1])
        if mse_value:
            mse = float(mse_value.group(1).strip())
            mse_values[country].append(mse)
        # Extract MAE value from the MAE line
        mae_value = re.search(r'mae:(.*?)\n', data[i+1])
        if mae_value:
            mae = float(mae_value.group(1).strip())
            mae_values[country].append(mae)

# Calculate average MSE and MAE for each country
average_mse_informer = {country: sum(values) / len(values) for country, values in mse_values.items()}
average_mae_informer = {country: sum(values) / len(values) for country, values in mae_values.items()}

# Create DataFrames for Informer
informer_countries_df = pd.DataFrame({'MSE': average_mse_informer, 'MAE': average_mae_informer})

# Add MultiIndex columns for both models
informer_countries_df.columns = pd.MultiIndex.from_product([['Informer'], informer_countries_df.columns])

Average results per column. It is a bit harder than for countries, therefore I placed it in other order, than for models above.

In [157]:
top_5_countries = ['DE', 'GB', 'ES', 'FR', 'IT']

# Dictionaries to store MSE and MAE values for each column
mse_values_dict = {}
mae_values_dict = {}

for country in top_5_countries:
    columns = [col for col in metrics_df.columns.levels[0] if col.startswith(country)]
    
    for i, col in enumerate(columns):

        # Initialize lists to store MSE and MAE values for the current column
        mse_values = []
        mae_values = []
        
        # Iterate over experiment numbers
        for j in range(2):
            path = "/Users/valentyna/Documents/Master_thesis_new/results/Informer/long_term_forecast__24_"
            path_2 = "_Informer_custom_ftM_sl96_ll48_pl24_dm512_nh8_el2_dl5_df2048_fc5_ebtimeF_dtTrue_Exp_"
            
            exp_number = j
            path_full = path + country + path_2 + str(exp_number)

            pred = np.load(path_full + "/pred.npy")[:, :, i]
            true = np.load(path_full + "/true.npy")[:, :, i]

            #mse = mean_squared_error(true, pred)
            #mae = mean_absolute_error(true, pred)
            mse = mean_squared_error(pred, true)
            mae = mean_absolute_error(pred, true)

            # Append MSE and MAE values to the lists
            mse_values.append(mse)
            mae_values.append(mae)
        
        # Store MSE and MAE values for the current column
        mse_values_dict[col] = mse_values
        mae_values_dict[col] = mae_values


In [158]:
# Dictionary to store average MSE and MAE values for each column
average_values_dict = {}

# Calculate average MSE and MAE for each column
for column in mse_values_dict.keys():
    mse_values = mse_values_dict[column]
    mae_values = mae_values_dict[column]
    
    # Calculate average MSE and MAE
    average_mse = np.mean(mse_values)
    average_mae = np.mean(mae_values)
    
    # Store the average values in the dictionary
    average_values_dict[column] = {'MSE': average_mse, 'MAE': average_mae}

In [171]:
# Create a DataFrame with MultiIndex
informer_cols_df = pd.DataFrame.from_dict(average_values_dict, orient='index')

# Add a MultiIndex for columns
informer_cols_df.columns = pd.MultiIndex.from_product([['Informer'], ['MSE', 'MAE']], names=['Model', 'Metrics'])

# Results of all models

In [175]:
# Concatenate the DataFrames for Columns dataframes
df_cols = pd.concat([naive_cols_df.iloc[:, :2], arima_cols_df.iloc[:, :2], informer_cols_df], axis=1)

df_cols

Model,Naive,Naive,ARIMA,ARIMA,Informer,Informer
Metrics,MSE,MAE,MSE,MAE,MSE,MAE
DE_load_actual_entsoe_transparency,0.406823,0.383997,0.471867,0.361086,0.120858,0.249314
DE_solar_generation_actual,0.175201,0.144906,0.188723,0.112712,0.151555,0.226218
DE_wind_generation_actual,0.809237,1.146706,0.651457,0.836964,0.644801,0.582968
DE_wind_offshore_generation_actual,1.241636,2.582417,0.893363,1.620248,1.185348,0.849587
DE_wind_onshore_generation_actual,0.764757,1.072277,0.647845,0.795603,0.625351,0.565995
ES_load_actual_entsoe_transparency,0.356224,0.305231,0.331985,0.220536,0.110779,0.234938
ES_solar_generation_actual,0.244915,0.210993,0.187641,0.126489,0.603296,0.454738
ES_wind_onshore_generation_actual,0.7449,0.912025,0.455867,0.42036,0.544108,0.544694
FR_load_actual_entsoe_transparency,0.238754,0.14001,0.190246,0.072617,0.071693,0.187501
FR_solar_generation_actual,0.178166,0.138144,0.192786,0.112883,0.126935,0.213305


In [176]:
# Concatenate the DataFrames for Countries dataframes
df_countries = pd.concat([naive_countries_df.iloc[:, :2], arima_countries_df.iloc[:, :2], informer_countries_df], axis=1)

df_countries

Unnamed: 0_level_0,Naive,Naive,ARIMA,ARIMA,Informer,Informer
Unnamed: 0_level_1,MSE,MAE,MSE,MAE,MSE,MAE
DE,0.679531,1.066061,0.745322,0.570651,0.545583,0.494817
GB,0.714248,1.18794,0.757825,0.595858,0.689944,0.573123
ES,0.44868,0.476083,0.255795,0.325165,0.419394,0.411457
FR,0.465762,0.691664,0.511321,0.442731,0.397621,0.360572
IT,0.43934,0.521386,0.328593,0.331661,0.222173,0.284555


In [None]:
# Loop over multiindex dataframe to get the columns that start with country index
# and also loop over the metrics to average

top_5_countries = ['DE', 'GB', 'ES', 'FR', 'IT']
metrics = ['MAE', 'MSE', 'RMSE']

for country in top_5_countries:
    print(f"Country: {country}")
    country_columns = [(col, metric) for col, metric in metrics_df.columns if col.startswith(country)]
    for metric in metrics:
        metric_columns = [(col, metric) for col, m in country_columns if m == metric]
        if metric_columns:
            values_to_average = metrics_df[metric_columns].values.flatten()
            mean_value = values_to_average.mean()
            print(f"Mean {metric}: {mean_value}")
        else:
            print(f"No data for metric {metric} in country {country}")
    print()