In [30]:
import pandas as pd

df_2024 = pd.read_csv('data/data_2024.csv')

df_2023 = pd.read_csv('data/data_2023.csv')

In [1]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np

def generate_predictions(consumption_2024, consumption_2023):
    # Generate predictions for missing months in 2024 using the last known value, trends from 2023, and seasonality
    predictions = []
    last_known_value = None
    
    # Calculate average monthly change for 2023 as a seasonal trend
    seasonality = [0] * len(consumption_2023)  # Initialize seasonality
    for i in range(1, len(consumption_2023)):
        if consumption_2023[i] is not None and consumption_2023[i-1] is not None:
            seasonality[i] = consumption_2023[i] - consumption_2023[i-1]
    
    # Replace missing values in 2024 with predictions considering seasonality
    for i in range(len(consumption_2024)):
        if consumption_2024[i] is not None:
            predictions.append(consumption_2024[i])
            last_known_value = consumption_2024[i]
        else:
            if last_known_value is not None:
                # For prediction, use the last known value and apply the seasonal component from 2023
                seasonal_effect = seasonality[i] if consumption_2023[i] is not None else 0
                prediction = last_known_value + seasonal_effect
                predictions.append(prediction)
                last_known_value = prediction
            else:
                predictions.append(None)  # If no known value in 2024, keep it as None
    
    return predictions


# Function to plot both 2024 and 2023 data
def plot_country_heating_consumption_with_2023(country_index, df_2024, df_2023):
    # Get the 2024 data
    country_2024 = df_2024.iloc[country_index]
    country_name = country_2024['Country']
    
    # Define months abbreviations
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    
    # Extract 2024 heating consumption data and coverage percentages
    consumption_2024 = [
        country_2024.get('January Value', None),
        country_2024.get('February Value', None),
        country_2024.get('March Value', None),
        country_2024.get('April Value', None),
        country_2024.get('May Value', None),
        country_2024.get('June Value', None),
        country_2024.get('July Value', None),
        country_2024.get('August Value', None),
        country_2024.get('September Value', None),
        country_2024.get('October Value', None),
        country_2024.get('November Value', None),
        country_2024.get('December Value', None)
    ]

    coverage_2024 = [
        country_2024.get('January Coverage', 100),
        country_2024.get('February Coverage', 100),
        country_2024.get('March Coverage', 100),
        country_2024.get('April Coverage', 100),
        country_2024.get('May Coverage', 100),
        country_2024.get('June Coverage', 100),
        country_2024.get('July Coverage', 100),
        country_2024.get('August Coverage', 100),
        country_2024.get('September Coverage', 100),
        country_2024.get('October Coverage', 100),
        country_2024.get('November Coverage', 100),
        country_2024.get('December Coverage', 100)
    ]
    
    # Convert 2024 consumption to floats where available
    consumption_2024 = [float(val.replace(',', '')) if isinstance(val, str) and val else None for val in consumption_2024]

    # Calculate 100% estimated values based on coverage
    consumption_2024_estimated = [val * 100 / coverage_2024[i] if val is not None and coverage_2024[i] != 100 else val for i, val in enumerate(consumption_2024)]

    # Get the 2023 data
    country_2023 = df_2023.iloc[country_index]

    # Extract 2023 heating consumption data (assuming the structure matches 2024 for months)
    consumption_2023 = [
        country_2023.get('January Value', None),
        country_2023.get('February Value', None),
        country_2023.get('March Value', None),
        country_2023.get('April Value', None),
        country_2023.get('May Value', None),
        country_2023.get('June Value', None),
        country_2023.get('July Value', None),
        country_2023.get('August Value', None),
        country_2023.get('September Value', None),
        country_2023.get('October Value', None),
        country_2023.get('November Value', None),
        country_2023.get('December Value', None)
    ]

    coverage_2023 = [
        country_2023.get('January Coverage', 100),
        country_2023.get('February Coverage', 100),
        country_2023.get('March Coverage', 100),
        country_2023.get('April Coverage', 100),
        country_2023.get('May Coverage', 100),
        country_2023.get('June Coverage', 100),
        country_2023.get('July Coverage', 100),
        country_2023.get('August Coverage', 100),
        country_2023.get('September Coverage', 100),
        country_2023.get('October Coverage', 100),
        country_2023.get('November Coverage', 100),
        country_2023.get('December Coverage', 100)
    ]

    # Convert 2023 consumption to floats where available
    consumption_2023 = [float(val.replace(',', '')) if isinstance(val, str) and val else None for val in consumption_2023]

    # Multiply by 1000 to convert to kWh
    consumption_2023 = [val * 1000 if val is not None else None for val in consumption_2023]
    consumption_2023_estimated = [val * 100 / coverage_2023[i] if val is not None and coverage_2023[i] != 100 else val for i, val in enumerate(consumption_2023)]

    # Convert consumption values from kWh to MWh by dividing by 1000
    consumption_2024 = [val / 1000 if val is not None else None for val in consumption_2024]
    consumption_2024_estimated = [val / 1000 if val is not None else None for val in consumption_2024_estimated]
    consumption_2023 = [val / 1000 if val is not None else None for val in consumption_2023]
    consumption_2023_estimated = [val / 1000 if val is not None else None for val in consumption_2023_estimated]
    
    # Generate predictions for the missing months in 2024
    consumption_2024_predicted = generate_predictions(consumption_2024_estimated, consumption_2023)

    # Find the index where the estimated values end and predicted values start
    estimated_end_index = next((i for i, val in enumerate(consumption_2024_estimated) if val is None), len(consumption_2024_estimated))

    # Calculate total consumption for 2023 and 2024
    total_consumption_2023 = sum([val for val in consumption_2023_estimated if val is not None])
    total_consumption_2024 = sum([val for val in consumption_2024_estimated[:estimated_end_index] if val is not None]) + sum([val for val in consumption_2024_predicted[estimated_end_index:] if val is not None])
    
    # Calculate percentage change from 2023 to 2024
    percentage_change = ((total_consumption_2024 - total_consumption_2023) / total_consumption_2023) * 100 if total_consumption_2023 != 0 else 0

    # Create the plot
    fig = go.Figure()

    # Spline trace for 2023 data
    fig.add_trace(go.Scatter(
        legendrank=1,
        showlegend=True,
        x=months,
        y=consumption_2023_estimated,
        mode='lines+markers',
        name='2023 Reported',
        line=dict(color='#007770', width=2),  # Light grey smooth line
        marker=dict(symbol='circle', size=5, color='#007770'),  # Circle bullet points
        line_shape='spline'
    ))

    # Plot the estimated 100% coverage first (this will be overlapped by the reported values)
    fig.add_trace(go.Bar(
        legendrank=3,
        x=months,
        y=[consumption_2024_estimated[i] if consumption_2024[i] is not None and consumption_2024[i] != consumption_2024_estimated[i] else 0 for i in range(len(consumption_2024_estimated))],
        name='2024 Estimated up to 100% Coverage',
        marker_color='#FEA77F',  # Orange tone
        width=0.6,
        opacity=1,
    ))

    # Plot the actual reported heating consumption on top of the estimated
    fig.add_trace(go.Bar(
        legendrank=2,
        x=months,
        y=[consumption_2024[i] if consumption_2024[i] is not None else 0 for i in range(len(consumption_2024))],
        name='2024 Reported with Meter Coverage',
        marker_color='#FF824D', 
        text=[f'{coverage_2024[i]}%' if consumption_2024[i] is not None else '' for i in range(len(consumption_2024))],  # Add coverage percentages as text
        textposition='inside',  # Position text inside the bars
        textfont=dict(color='white', size=8, family='Figtree'),  # Set text color to white and adjust font size
        insidetextanchor='middle',  # Ensure the text is centered inside the bar
        textangle=0,  # Ensure text is straight/horizontal
        width=0.6,
        opacity=1
    ))

    # Add text value on top of the estimated bar if coverage is 100%
    fig.add_trace(go.Scatter(
        showlegend=False,  # Don't show this in the legend
        x=months,
        y=[consumption_2024_estimated[i] if consumption_2024_estimated[i] is not None else 0 for i in range(len(consumption_2024_estimated))],
        mode='text',
        text=[f'<b>{int(round(consumption_2024_estimated[i] / 1000))}K</b>' if consumption_2024_estimated[i] is not None and consumption_2024_estimated[i] >= 1000 else f'<b>{int(round(consumption_2024_estimated[i]))}</b>' if consumption_2024_estimated[i] is not None else '' for i in range(len(consumption_2024_estimated))],
        textposition='top center',  # Position text outside the bars
        textfont=dict(color='#44546A', size=10, family='Figtree', weight='bold'),
    ))

    # Set bar mode to overlay for overlapping bars
    fig.update_layout(barmode='overlay')

    # Bar chart for predicted heating consumption for missing months in 2024 with lighter shade
    fig.add_trace(go.Bar(
        legendrank=4,
        x=[months[i] for i in range(len(consumption_2024_predicted)) if consumption_2024[i] is None],
        y=[consumption_2024_predicted[i] for i in range(len(consumption_2024_predicted)) if consumption_2024[i] is None],
        name='2024 Forecast',
        marker_color='#F2EFEC',
        width=0.6,
        opacity=1,
        text=[f'<b>{int(round(consumption_2024_predicted[i] / 1000))}K</b>' if consumption_2024_predicted[i] is not None and consumption_2024_predicted[i] >= 1000 else f'<b>{int(round(consumption_2024_predicted[i]))}</b>' if consumption_2024_predicted[i] is not None else '' for i in range(len(consumption_2024_predicted)) if consumption_2024[i] is None],
        textposition='outside',  # Position text outside the bars
        textfont=dict(size=10, color='#44546A', family='Figtree', weight='bold'),
        insidetextanchor='middle'  # Ensure the text is centered inside the bar
    ))

    # Update the layout
    fig.update_layout(
        title={
            'text': f'Heating Consumption for {country_name} (2023 vs 2024)',
            'x': 0.5,  # Center the title
            'y': 0.95,  # Adjust the vertical position of the title
            'xanchor': 'center',  # Anchor the title at the center
            'yanchor': 'top',  # Anchor the title at the top
            'font': dict(size=20, family='Figtree', color='#44546A')  # Increase title font size if needed
        },
        yaxis_title_standoff=10,
        plot_bgcolor='white',
        font=dict(family='Figtree', size=14, color='#44546A'),
        legend=dict(orientation='h', yanchor='bottom', y=-0.175, xanchor='left', x=0.01, font=dict(size=8, color='#44546A')), 
        width=700,  # Decreased width for better visibility
        height=600,
        margin=dict(l=50, r=50, t=100, b=150),  # Adjusted top margin to make the distance between the title and the graph smaller
        bargap=0.2  # Adjusted gap to make the overlapping bars clearer
    )

    # Define x-axis line and gridlines
    fig.update_yaxes(showline=False, showgrid=True, gridcolor='lightgrey', gridwidth=1, tickfont=dict(size=8, color='#44546A'))
    fig.update_xaxes(showline=True, linecolor='#44546A', linewidth=0.75, showgrid=False, title_font=dict(size=8, color='#44546A'), tickfont=dict(size=10, family='Figtree', color='#44546A'))

    # Define y position and spacing
    y_position = -0.25

    # Add total consumption and percentage change annotations at the bottom as one annotation
    annotation_text = (
        f"<b>Total 2023: {total_consumption_2023:,.0f} MWh</b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" +
        f"<b>Total Estimated 2024: {total_consumption_2024:,.0f} MWh</b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" +
        f"<b>LY &#x2206; <span style='color:#FF0000;'>+{round(percentage_change, 2)}%</b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" +
        f"<b>Budget 2024: xxx MWh</b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" +
        f"<b>FC vs BU &#x2206; <span style='color:#00FF00;'>+xxx%</span></b>"

    )
    fig.add_annotation(
        text=annotation_text,
        xref="paper",
        yref="paper",
        x=0.01,  # Center the annotation
        y=y_position,
        showarrow=False,
        font=dict(size=10, color="#44546A", family='Figtree'),
        align="left"
    )

    # Add a 0.1 margin to the y-axis based on the max point of the data
    max_y_value = max(
        max([float(x) for x in consumption_2023_estimated if x is not None]),
        max([float(x) for x in consumption_2024_estimated if x is not None]),
        max([float(x) for x in consumption_2024_predicted if x is not None])
    )
    fig.update_yaxes(range=[0, max_y_value * 1.1])
    fig.write_image(f"heating_conusmption_2023_vs_2024_{country_name}.png", scale=5)
    return fig

# Example: Plotting for the second country (index 1)
df_2024 = pd.read_csv('data/data_2024.csv')
df_2023 = pd.read_csv('data/data_2023.csv')
fig = plot_country_heating_consumption_with_2023(1, df_2024, df_2023)


fig.show()

In [63]:
fig.write_image("heating_conusmption_2023_vs_2024_finland.png", scale=5)