In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import plotly.graph_objects as go
import plotly.express as px
data1 = pd.read_csv('Residence1.csv')
data1 = data1.drop('HyperNetLSTM', axis=1)
data2 = pd.read_csv('Residence2.csv')
data2 = data2.drop('HyperNetLSTM', axis=1)


def smape(y_true, y_pred):
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return 100 * np.mean(diff)

def calculate_metrics(data):
    y_actual = data['Actual']
    error_metrics = {}
    for model in data.columns[1:]:
        y_pred = data[model]
        mae = mean_absolute_error(y_actual, y_pred)
        rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
        smape_value = smape(y_actual, y_pred)
        error_metrics[model] = {
            'MAE': mae,
            'RMSE': rmse,
            'SMAPE': smape_value
        }
    error_metrics_df = pd.DataFrame(error_metrics).transpose()
    metrix = error_metrics_df.sort_values(by='SMAPE')
    metrix.to_csv('./comparison.csv')
    return error_metrics_df.sort_values(by='SMAPE')
metrics_df1 = calculate_metrics(data1)
metrics_df1.to_csv('./comparison1.csv')
metrics_df2 = calculate_metrics(data2)
metrics_df2.to_csv('./comparison2.csv')

def plotly_top_performers_centered(data, metrics_df, title):
    top_3_models = metrics_df.index[:4]  # Top 4 models
    fig = go.Figure()

    # Adding actual values
    fig.add_trace(go.Scatter(x=data.index[:100], y=data['Actual'][:100], mode='lines', name='Actual'))

    # Adding predicted values of top 3 models
    for model in top_3_models:
        fig.add_trace(go.Scatter(x=data.index[:100], y=data[model][:100], mode='lines', name=model))

    # Update plot layout with centered title
    fig.update_layout(title=f'Actual vs Predicted - Top 4 Models by SMAPE ({title})',
                      title_x=0.5,
                      xaxis_title='Observations',
                      yaxis_title='Values',
                      legend_title='Models',
                      template='plotly_white',
                      height=400)
    fig.show()

def plotly_all_metrics_bar_chart_centered(metrics_df, title):
    # Melting the DataFrame to long format for Plotly
    long_df = metrics_df.reset_index().melt(id_vars='index', value_vars=['MAE', 'RMSE', 'SMAPE'])
    long_df.rename(columns={'index': 'Model', 'variable': 'Metric', 'value': 'Value'}, inplace=True)

    # Creating the bar chart with centered title
    fig = px.bar(long_df, x='Model', y='Value', color='Metric', barmode='group',
                 labels={'Value': 'Metric Value', 'Model': 'Models'})
    
    # Updating layout for centered title
    fig.update_layout(title=f'All Metrics Comparison - {title}', title_x=0.5, template='plotly_white', height=500)
    fig.show()
plotly_all_metrics_bar_chart_centered(metrics_df1, 'Residence 1')
plotly_all_metrics_bar_chart_centered(metrics_df2, 'Residence 2')
plotly_top_performers_centered(data1, metrics_df1, 'Residence 1')
plotly_top_performers_centered(data2, metrics_df2, 'Residence 2')

styled_df1 = metrics_df1.style.background_gradient(cmap='coolwarm', subset=['MAE', 'RMSE', 'SMAPE'])\
    .format("{:.2f}", subset=['MAE', 'RMSE', 'SMAPE'])\
    .set_caption("Error Metrics Sorted by SMAPE (Residence 1)")\
    .set_precision(2)

styled_df2 = metrics_df2.style.background_gradient(cmap='coolwarm', subset=['MAE', 'RMSE', 'SMAPE'])\
    .format("{:.2f}", subset=['MAE', 'RMSE', 'SMAPE'])\
    .set_caption("Error Metrics Sorted by SMAPE (Residence 2)")\
    .set_precision(2)

#styled_df1
styled_df2



this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`



Unnamed: 0,MAE,RMSE,SMAPE
HyperNetLSTMRBF,16.49,24.59,6.7
HyperNetLSTMPoly,17.29,25.8,7.02
AttentionLSTM,18.23,27.33,7.51
ARFFNN,19.19,26.65,7.82
GRU,19.67,28.04,8.14
Nbeats,20.6,29.87,8.43
LSTM,21.76,30.33,9.29
Transformer,29.06,36.59,11.52
MLP,29.24,36.89,11.61
RNN,29.05,39.5,12.0


In [7]:
def calculate_seasonal_metrics(data, season_start, season_end):
    season_data = data.iloc[season_start:season_end]
    y_actual = season_data['Actual']
    error_metrics = {}
    for model in data.columns[1:]:
        y_pred = season_data[model]
        mae = mean_absolute_error(y_actual, y_pred)
        rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
        smape_value = smape(y_actual, y_pred)
        error_metrics[model] = {
            'MAE': mae,
            'RMSE': rmse,
            'SMAPE': smape_value
        }
    return pd.DataFrame(error_metrics).transpose().sort_values(by='SMAPE')

# Function to plot top performers for each season
def plotly_top_performers_season(data, metrics_df, title, season):
    top_3_models = metrics_df.index[:4]
    fig = go.Figure()

    # Adding actual values
    fig.add_trace(go.Scatter(x=data.index[:100], y=data['Actual'][:100], mode='lines', name='Actual'))

    # Adding predicted values of top 3 models
    for model in top_3_models:
        fig.add_trace(go.Scatter(x=data.index[:100], y=data[model][:100], mode='lines', name=model))

    # Update plot layout with centered title and color scheme
    fig.update_layout(title=f'{season} - Top 4 Models by SMAPE ({title})',
                      title_x=0.5,
                      xaxis_title='Observations',
                      yaxis_title='Values',
                      legend_title='Models',
                      template='plotly_white',
                      height=400)
    fig.show()

# Define seasons and their corresponding start and end indices
seasons = {
    'Fall': (0, 62680),       # August to November
    'Winter': (62680, 125360),# December to March
    'Spring': (125360, 157680),# April to May
    'Summer': (157680, 188041) # June to July
}

for season, (start, end) in seasons.items():
    season_metrics_df1 = calculate_seasonal_metrics(data1, start, end)
    season_metrics_df2 = calculate_seasonal_metrics(data2, start, end)

    plotly_top_performers_season(data1[start:end], season_metrics_df1, 'Residence 1', season)
    plotly_top_performers_season(data2[start:end], season_metrics_df2, 'Residence 2', season)

    # Styling tables for each season
    styled_df1 = season_metrics_df1.style.background_gradient(cmap='coolwarm', subset=['MAE', 'RMSE', 'SMAPE'])\
        .format("{:.2f}", subset=['MAE', 'RMSE', 'SMAPE'])\
        .set_caption(f"Error Metrics Sorted by SMAPE (Residence 1 - {season})")\
        .set_precision(2)

    styled_df2 = season_metrics_df2.style.background_gradient(cmap='coolwarm', subset=['MAE', 'RMSE', 'SMAPE'])\
        .format("{:.2f}", subset=['MAE', 'RMSE', 'SMAPE'])\
        .set_caption(f"Error Metrics Sorted by SMAPE (Residence 2 - {season})")\
        .set_precision(2)

    # Display styled tables
    #styled_df1
    #styled_df2



this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`




this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`




this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`




this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`



In [8]:
def calculate_seasonal_metrics(data, season, dataset_name):
    y_actual = data['Actual']
    error_metrics = {}
    for model in data.columns[1:]:
        y_pred = data[model]
        mae = mean_absolute_error(y_actual, y_pred)
        rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
        smape_value = smape(y_actual, y_pred)
        error_metrics[model] = {
            'Dataset': dataset_name,
            'Season': season,
            'MAE': mae,
            'RMSE': rmse,
            'SMAPE': smape_value
        }
    return pd.DataFrame(error_metrics).transpose()
seasons = {
    'Fall': (0, 62680),       # August to November
    'Winter': (62680, 125360),# December to March
    'Spring': (125360, 157680),# April to May
    'Summer': (157680, 188041) # June to July
}

combined_metrics = []

for season, (start, end) in seasons.items():
    # Calculating metrics for each model in the season for Residence 1
    for model in data1.columns[1:]:
        season_data = data1.iloc[start:end]
        y_actual = season_data['Actual']
        y_pred = season_data[model]
        mae = mean_absolute_error(y_actual, y_pred)
        rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
        smape_value = smape(y_actual, y_pred)
        combined_metrics.append({
            'Dataset': 'Residence 1',
            'Season': season,
            'Model': model,
            'MAE': mae,
            'RMSE': rmse,
            'SMAPE': smape_value
        })

    # Calculating metrics for each model in the season for Residence 2
    for model in data2.columns[1:]:
        season_data = data2.iloc[start:end]
        y_actual = season_data['Actual']
        y_pred = season_data[model]
        mae = mean_absolute_error(y_actual, y_pred)
        rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
        smape_value = smape(y_actual, y_pred)
        combined_metrics.append({
            'Dataset': 'Residence 2',
            'Season': season,
            'Model': model,
            'MAE': mae,
            'RMSE': rmse,
            'SMAPE': smape_value
        })

combined_metrics_df = pd.DataFrame(combined_metrics)

styled_combined_metrics = combined_metrics_df.style.background_gradient(cmap='coolwarm', subset=['MAE', 'RMSE', 'SMAPE'])\
    .format("{:.2f}", subset=['MAE', 'RMSE', 'SMAPE'])\
    .set_caption("Error Metrics Sorted by SMAPE for All Seasons")\
    .set_precision(2)

combined_metrics_df.to_csv('seasonal_metrics_with_models.csv')

#styled_combined_metrics




this method is deprecated in favour of `Styler.format(precision=..)`



In [9]:
def process_seasonal_metrics_without_index(data, dataset_name):
    seasonal_tables = {}
    
    for season, (start, end) in seasons.items():
        season_data = data.iloc[start:end]
        y_actual = season_data['Actual']
        season_metrics = []

        for model in data.columns[1:]:
            y_pred = season_data[model]
            mae = mean_absolute_error(y_actual, y_pred)
            rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
            smape_value = smape(y_actual, y_pred)
            season_metrics.append({
                'Model': model,
                'MAE': mae,
                'RMSE': rmse,
                'SMAPE': smape_value
            })
        season_metrics_df = pd.DataFrame(season_metrics).sort_values(by='SMAPE')
        styled_season_metrics = season_metrics_df.style.hide_index()\
            .background_gradient(cmap='coolwarm', subset=['MAE', 'RMSE', 'SMAPE'])\
            .format("{:.2f}", subset=['MAE', 'RMSE', 'SMAPE'])\
            .set_caption(f"Error Metrics Sorted by SMAPE ({dataset_name} - {season})")\
            .set_precision(2)

        seasonal_tables[season] = styled_season_metrics

    return seasonal_tables

seasonal_tables_residence1_no_index = process_seasonal_metrics_without_index(data1, "Residence 1")
seasonal_tables_residence2_no_index = process_seasonal_metrics_without_index(data2, "Residence 2")

for season, table in seasonal_tables_residence1_no_index.items():
    print(f"Residence 1 - {season} Season")
    display(table)
    
for season, table in seasonal_tables_residence2_no_index.items():
    print(f"Residence 2 - {season} Season")
    display(table)

Residence 1 - Fall Season



this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`


this method is deprecated in favour of `Styler.format(precision=..)`



Model,MAE,RMSE,SMAPE
Transformer,22.0,31.56,8.59
AttentionLSTM,24.04,33.74,9.07
GRU,25.48,33.28,9.71
TempConv,24.5,31.8,9.93
HyperNetLSTMRBF,28.28,37.3,10.68
LSTM,28.95,38.15,11.01
RNN,32.95,44.94,11.89
HyperNetLSTMPoly,33.76,43.75,12.7
ARFFNN,34.49,43.91,13.05
Nbeats,35.17,45.56,13.41


Residence 1 - Winter Season


Model,MAE,RMSE,SMAPE
HyperNetLSTMPoly,18.52,24.02,6.88
MLP,18.68,23.35,7.0
HyperNetLSTMRBF,18.92,24.41,7.07
LSTM,19.27,24.81,7.25
ARFFNN,19.43,24.77,7.27
RNN,20.31,26.45,7.61
Nbeats,23.12,30.03,8.79
AttentionLSTM,25.52,32.02,9.65
TempConv,26.53,33.42,10.09
Transformer,27.73,34.4,10.4


Residence 1 - Spring Season


Model,MAE,RMSE,SMAPE
HyperNetLSTMRBF,21.02,29.08,9.05
LSTM,20.76,28.78,9.06
ARFFNN,21.8,29.63,9.49
HyperNetLSTMPoly,22.0,30.22,9.5
AttentionLSTM,22.14,30.13,9.51
RNN,23.34,34.53,9.85
Transformer,23.06,32.4,10.02
Nbeats,24.24,34.34,10.82
TempConv,24.68,33.53,10.93
MLP,28.27,38.89,12.01


Residence 1 - Summer Season


Model,MAE,RMSE,SMAPE
HyperNetLSTMRBF,11.55,17.76,7.93
HyperNetLSTMPoly,12.13,17.88,8.04
Transformer,11.89,19.2,8.42
AttentionLSTM,12.29,18.21,8.43
GRU,12.34,18.14,8.65
Nbeats,12.61,19.51,8.74
LSTM,13.45,19.04,9.11
ARFFNN,14.59,19.87,9.71
TempConv,13.74,19.88,9.95
RNN,17.96,24.98,11.98


Residence 2 - Fall Season


Model,MAE,RMSE,SMAPE
HyperNetLSTMRBF,18.17,26.27,6.25
LSTM,19.04,27.0,6.6
HyperNetLSTMPoly,19.6,28.23,6.77
GRU,19.6,26.88,6.87
AttentionLSTM,20.09,28.57,6.92
ARFFNN,20.79,27.38,7.23
Nbeats,23.66,32.97,8.22
RNN,29.19,40.23,10.03
TempConv,35.42,48.81,12.09
MLP,35.94,43.98,12.26


Residence 2 - Winter Season


Model,MAE,RMSE,SMAPE
HyperNetLSTMRBF,11.46,15.35,5.11
HyperNetLSTMPoly,12.16,16.83,5.46
ARFFNN,12.36,15.58,5.6
AttentionLSTM,12.1,15.47,5.62
GRU,15.58,21.66,7.05
Nbeats,15.6,20.5,7.05
LSTM,19.09,23.68,8.93
Transformer,20.67,25.85,9.17
MLP,20.77,26.01,9.22
RNN,26.06,34.18,11.73


Residence 2 - Spring Season


Model,MAE,RMSE,SMAPE
HyperNetLSTMRBF,14.91,20.87,6.4
HyperNetLSTMPoly,15.07,21.27,6.51
AttentionLSTM,15.48,21.73,6.8
ARFFNN,16.63,22.35,7.18
Nbeats,16.88,24.07,7.39
GRU,17.48,23.76,7.6
LSTM,21.67,27.78,9.77
Transformer,23.15,29.92,9.98
MLP,23.29,30.1,10.04
RNN,26.2,34.83,11.7


Residence 2 - Summer Season


Model,MAE,RMSE,SMAPE
HyperNetLSTMRBF,25.1,37.02,11.22
HyperNetLSTMPoly,25.45,37.57,11.32
Nbeats,28.55,42.4,12.83
AttentionLSTM,29.95,44.1,13.42
GRU,30.58,42.53,13.58
ARFFNN,32.68,42.63,14.29
TempConv,34.26,48.06,14.98
LSTM,33.0,47.03,15.1
Transformer,38.4,44.53,16.47
MLP,39.23,45.54,16.85


In [5]:
import pandas as pd
import plotly.express as px
from sklearn.metrics import mean_absolute_error

# Load your data
data1 = pd.read_csv('Residence1.csv')
data2 = pd.read_csv('Residence2.csv')

# Define the seasons and their corresponding indices
seasons = {
    'Fall': (0, 62680),       # August to November
    'Winter': (62680, 125360),# December to March
    'Spring': (125360, 157680),# April to May
    'Summer': (157680, 188041) # June to July
}

def create_mae_bar_chart(data, dataset_name):
    # Preparing data for the bar chart
    bar_chart_data = []
    for season, (start, end) in seasons.items():
        season_data = data.iloc[start:end]
        y_actual = season_data['Actual']
        for model in data.columns[1:]:
            y_pred = season_data[model]
            mae = mean_absolute_error(y_actual, y_pred)
            bar_chart_data.append({'Season': season, 'Model': model, 'MAE': mae})

    df_bar_chart = pd.DataFrame(bar_chart_data)

    # Creating the bar chart
    fig = px.bar(df_bar_chart, x='Model', y='MAE', color='Season', barmode='group',
                 title=f'MAE Bar Chart Across Seasons for {dataset_name}',
                 category_orders={"Season": ["Fall", "Winter", "Spring", "Summer"]},
                 height=600)
    return fig

# Create and display the bar chart for each dataset
bar_chart_residence1 = create_mae_bar_chart(data1, "Residence 1")
bar_chart_residence1.show()

bar_chart_residence2 = create_mae_bar_chart(data2, "Residence 2")
bar_chart_residence2.show()


In [6]:
# Adjusting the script to create an interactive bar chart for SMAPE across seasons for each model

def create_smape_interactive_bar_chart_centered_caption(data, dataset_name):
    # Function to calculate SMAPE
    def smape(y_true, y_pred):
        denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
        diff = np.abs(y_true - y_pred) / denominator
        diff[denominator == 0] = 0.0
        return 100 * np.mean(diff)

    # Preparing data for the bar chart
    bar_chart_data = []
    for season, (start, end) in seasons.items():
        season_data = data.iloc[start:end]
        y_actual = season_data['Actual']
        for model in data.columns[1:]:
            y_pred = season_data[model]
            smape_value = smape(y_actual, y_pred)
            bar_chart_data.append({'Season': season, 'Model': model, 'SMAPE': smape_value})

    df_bar_chart = pd.DataFrame(bar_chart_data)

    # Creating the interactive bar chart with centered caption
    fig = px.bar(df_bar_chart, x='Model', y='SMAPE', color='Season', barmode='group',
                 title=f'SMAPE Across Seasons for {dataset_name}',
                 labels={'SMAPE': 'Symmetric Mean Absolute Percentage Error', 'Model': 'Model', 'Season': 'Season'},
                 height=600)
    fig.update_layout(title={'text': f'SMAPE Across Seasons for {dataset_name}', 'x': 0.5})
    return fig

# Create and display the interactive bar chart with centered caption for Residence 1
interactive_bar_chart_residence1_smape_centered = create_smape_interactive_bar_chart_centered_caption(data1, "Residence 1")
interactive_bar_chart_residence1_smape_centered.show()

# Create and display the interactive bar chart with centered caption for Residence 2
interactive_bar_chart_residence2_smape_centered = create_smape_interactive_bar_chart_centered_caption(data2, "Residence 2")
interactive_bar_chart_residence2_smape_centered.show()

