In [75]:
import os
import numpy as np 
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error

# ARIMA results

Averaged results per column.

In [161]:
# Load results
# Please note that results have multiindex, therefore we have to read them 
# in a right way
# preds_df = pd.read_csv('datasets/predictions_full.csv', header=[0, 1], index_col=0)
metrics_df = pd.read_csv('results/arima/metrics_full.csv', header=[0, 1], index_col=0)

# Pivot the DataFrame to convert from long to wide format
mean_values = pd.DataFrame(metrics_df.mean(axis=0))
df_wide_arima = mean_values.unstack(level=-1)

# Set column names for the MultiIndex
#df_wide.columns = pd.MultiIndex.from_tuples([('ARIMA', 'MSE'), ('ARIMA', 'MAE'), ('ARIMA', 'RMSE')], names=['Model', 'Metrics'])
df_wide_arima.columns = pd.MultiIndex.from_product([['ARIMA'], ['MSE', 'MAE', 'RMSE']], names=['Model', 'Metrics'])

df_wide_arima

Model,ARIMA,ARIMA,ARIMA
Metrics,MSE,MAE,RMSE
Column name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
DE_load_actual_entsoe_transparency,0.471867,0.361086,0.539805
DE_solar_generation_actual,0.188723,0.112712,0.299724
DE_wind_generation_actual,0.651457,0.836964,0.784611
DE_wind_offshore_generation_actual,0.893363,1.620248,1.106839
DE_wind_onshore_generation_actual,0.647845,0.795603,0.777313
ES_load_actual_entsoe_transparency,0.331985,0.220536,0.379486
ES_solar_generation_actual,0.187641,0.126489,0.284286
ES_wind_onshore_generation_actual,0.455867,0.42036,0.533582
FR_load_actual_entsoe_transparency,0.190246,0.072617,0.225709
FR_solar_generation_actual,0.192786,0.112883,0.31627


Averaged metrics per country.

In [67]:
# Loop over multiindex dataframe to get the columns that start with country index
# and also loop over the metrics to average

top_5_countries = ['DE', 'GB', 'ES', 'FR', 'IT']
metrics = ['MAE', 'MSE', 'RMSE']

mse_values = {}
mae_values = {}
rmse_values = {}

for country in top_5_countries:
    for metric in metrics:
        columns = [(col, metric) for col in metrics_df.columns.levels[0] if col.startswith(country)]
        for col, _ in columns:
            values = metrics_df[col, metric].dropna().tolist()
            
            if country not in mse_values:
                mse_values[country] = []
            if country not in mae_values:
                mae_values[country] = []
            if country not in rmse_values:
                rmse_values[country] = []

            if values:
                if metric == 'MAE':
                    mae_values[country].extend(values)
                elif metric == 'MSE':
                    mse_values[country].extend(values)
                elif metric == 'RMSE':
                    rmse_values[country].extend(values)

# Calculate average values for each metric
average_mse_arima = {country: sum(values) / len(values) for country, values in mse_values.items()}
average_mae_arima = {country: sum(values) / len(values) for country, values in mae_values.items()}
average_rmse_arima = {country: sum(values) / len(values) for country, values in rmse_values.items()}

# Print the results
for country in average_mse_arima:
    print(f"Country: {country}")
    print(f"Average MSE: {average_mse_arima[country]}")
    print(f"Average MAE: {average_mae_arima[country]}")
    print(f"Average RMSE: {average_rmse_arima[country]}")
    print()


Country: DE
Average MSE: 0.7453224891363754
Average MAE: 0.5706510840625352
Average RMSE: 0.7016585840192152

Country: GB
Average MSE: 0.7578253321729908
Average MAE: 0.5958583171212944
Average RMSE: 0.7253604939238645

Country: ES
Average MSE: 0.2557949224133751
Average MAE: 0.325164515764063
Average RMSE: 0.39911787644351054

Country: FR
Average MSE: 0.5113212249173066
Average MAE: 0.44273100850014946
Average RMSE: 0.5417099244159309

Country: IT
Average MSE: 0.3285929098632392
Average MAE: 0.3316613675610475
Average RMSE: 0.4068921940489003



# Informer results

In [68]:
import re

# Change to your path
file_path = "/Users/valentyna/Documents/Master_thesis_new/results/Informer/result_long_term_forecast.txt"

# Read the file and split lines
with open(file_path, "r") as file:
    data = file.readlines()

# Create dictionaries to store MSE and MAE values for each country
mse_values = {}
mae_values = {}

# Iterate through lines in groups of three
for i in range(0, len(data), 3):
    # Extract country code from the line using regular expression
    match = re.search(r'long_term_forecast__24_([A-Z]{2})_Informer', data[i])
    if match:
        country = match.group(1)
        # Initialize lists for MSE and MAE values if not already present
        if country not in mse_values:
            mse_values[country] = []
        if country not in mae_values:
            mae_values[country] = []
        # Extract MSE value from the MSE line
        mse_value = re.search(r'mse:(.*?),', data[i+1])
        if mse_value:
            mse = float(mse_value.group(1).strip())
            mse_values[country].append(mse)
        # Extract MAE value from the MAE line
        mae_value = re.search(r'mae:(.*?)\n', data[i+1])
        if mae_value:
            mae = float(mae_value.group(1).strip())
            mae_values[country].append(mae)

# Calculate average MSE and MAE for each country
average_mse_informer = {country: sum(values) / len(values) for country, values in mse_values.items()}
average_mae_informer = {country: sum(values) / len(values) for country, values in mae_values.items()}

# Print the results
for country in average_mse_informer:
    print(f"Country: {country}")
    print(f"Average MSE: {average_mse_informer[country]}")
    print(f"Average MAE: {average_mae_informer[country]}")
    print()



Country: DE
Average MSE: 0.5455829799175262
Average MAE: 0.4948166310787201

Country: GB
Average MSE: 0.6899437606334686
Average MAE: 0.5731228291988373

Country: ES
Average MSE: 0.41939422488212585
Average MAE: 0.4114568680524826

Country: FR
Average MSE: 0.397621214389801
Average MAE: 0.3605721741914749

Country: IT
Average MSE: 0.22217324376106262
Average MAE: 0.2845545709133148



In [70]:
# Create DataFrames for Informer and ARIMA models
df_informer = pd.DataFrame({'MSE': average_mse_informer, 'MAE': average_mae_informer})
df_arima = pd.DataFrame({'MSE': average_mse_arima, 'MAE': average_mae_arima})

# Add MultiIndex columns for both models
df_informer.columns = pd.MultiIndex.from_product([['Informer'], df_informer.columns])
df_arima.columns = pd.MultiIndex.from_product([['ARIMA'], df_arima.columns])

# Concatenate the DataFrames
df = pd.concat([df_informer, df_arima], axis=1)

df


Unnamed: 0_level_0,Informer,Informer,ARIMA,ARIMA
Unnamed: 0_level_1,MSE,MAE,MSE,MAE
DE,0.545583,0.494817,0.745322,0.570651
GB,0.689944,0.573123,0.757825,0.595858
ES,0.419394,0.411457,0.255795,0.325165
FR,0.397621,0.360572,0.511321,0.442731
IT,0.222173,0.284555,0.328593,0.331661


# Columns results comparison

In [157]:
top_5_countries = ['DE', 'GB', 'ES', 'FR', 'IT']

# Dictionaries to store MSE and MAE values for each column
mse_values_dict = {}
mae_values_dict = {}

for country in top_5_countries:
    columns = [col for col in metrics_df.columns.levels[0] if col.startswith(country)]
    
    for i, col in enumerate(columns):

        # Initialize lists to store MSE and MAE values for the current column
        mse_values = []
        mae_values = []
        
        # Iterate over experiment numbers
        for j in range(2):
            path = "/Users/valentyna/Documents/Master_thesis_new/results/Informer/long_term_forecast__24_"
            path_2 = "_Informer_custom_ftM_sl96_ll48_pl24_dm512_nh8_el2_dl5_df2048_fc5_ebtimeF_dtTrue_Exp_"
            
            exp_number = j
            path_full = path + country + path_2 + str(exp_number)

            pred = np.load(path_full + "/pred.npy")[:, :, i]
            true = np.load(path_full + "/true.npy")[:, :, i]

            #mse = mean_squared_error(true, pred)
            #mae = mean_absolute_error(true, pred)
            mse = mean_squared_error(pred, true)
            mae = mean_absolute_error(pred, true)

            # Append MSE and MAE values to the lists
            mse_values.append(mse)
            mae_values.append(mae)
        
        # Store MSE and MAE values for the current column
        mse_values_dict[col] = mse_values
        mae_values_dict[col] = mae_values


In [158]:
# Dictionary to store average MSE and MAE values for each column
average_values_dict = {}

# Calculate average MSE and MAE for each column
for column in mse_values_dict.keys():
    mse_values = mse_values_dict[column]
    mae_values = mae_values_dict[column]
    
    # Calculate average MSE and MAE
    average_mse = np.mean(mse_values)
    average_mae = np.mean(mae_values)
    
    # Store the average values in the dictionary
    average_values_dict[column] = {'MSE': average_mse, 'MAE': average_mae}

In [159]:
# Create a DataFrame with MultiIndex
df_informer = pd.DataFrame.from_dict(average_values_dict, orient='index')

# Add a MultiIndex for columns
df_informer.columns = pd.MultiIndex.from_product([['Informer'], ['MSE', 'MAE']], names=['Model', 'Metrics'])

# Concatenate with ARIMA
df_all = pd.concat([df_informer, df_wide_arima.iloc[:, :2]], axis=1)

df_all

Model,Informer,Informer,ARIMA,ARIMA
Metrics,MSE,MAE,MSE,MAE
DE_load_actual_entsoe_transparency,0.120858,0.249314,0.471867,0.361086
DE_solar_generation_actual,0.151555,0.226218,0.188723,0.112712
DE_wind_generation_actual,0.644801,0.582968,0.651457,0.836964
DE_wind_offshore_generation_actual,1.185348,0.849587,0.893363,1.620248
DE_wind_onshore_generation_actual,0.625351,0.565995,0.647845,0.795603
GB_UKM_load_actual_entsoe_transparency,0.144596,0.282156,0.255775,0.118962
GB_UKM_solar_generation_actual,0.210975,0.270608,0.237781,0.138101
GB_UKM_wind_generation_actual,0.860881,0.708348,0.759619,0.928757
GB_UKM_wind_offshore_generation_actual,1.587935,0.993076,1.177409,2.06487
GB_UKM_wind_onshore_generation_actual,0.645331,0.611427,0.548707,0.538438


In [None]:
# Loop over multiindex dataframe to get the columns that start with country index
# and also loop over the metrics to average

top_5_countries = ['DE', 'GB', 'ES', 'FR', 'IT']
metrics = ['MAE', 'MSE', 'RMSE']

for country in top_5_countries:
    print(f"Country: {country}")
    country_columns = [(col, metric) for col, metric in metrics_df.columns if col.startswith(country)]
    for metric in metrics:
        metric_columns = [(col, metric) for col, m in country_columns if m == metric]
        if metric_columns:
            values_to_average = metrics_df[metric_columns].values.flatten()
            mean_value = values_to_average.mean()
            print(f"Mean {metric}: {mean_value}")
        else:
            print(f"No data for metric {metric} in country {country}")
    print()