# Interactive MeteoHist graphs

In [None]:
import importlib
import os
import sys

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.express.colors import sample_colorscale
import datetime as dt
from calendar import isleap

In [None]:
# Add the parent directory to the path to make imports work
module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

import utils

# Get data

In [None]:
importlib.reload(utils)

metric = {"name": "temperature_max", "data": "temperature_2m_max", "unit": "°C"}
units = "imperial" if metric["unit"] == "°F" else "metric"
year = 2023
reference_period = (1961, 1990)

# Berlin lat, lon = 52.5170365, 13.3888599
# Rome lat, lon = 41.8933203, 12.4829321
# Caracas lat, lon = 10.5060934, -66.9146008
# Madrid
lat, lon = 40.417932, -3.690659
# Addis Ababa, Ethiopia lat, lon = 9.0358451, 38.7524066

location_name = utils.get_location((lat, lon))

df = utils.get_data(
    lat,
    lon,
    year,
    reference_period=reference_period,
    metric=metric["data"],
    units=units,
)
# df.to_csv(f"{metric}-berlin-1941-2023.csv")

settings = {"lat": lat, "lon": lon, "location_name": location_name, "metric": metric}
plot = utils.MeteoHist(df, year, reference_period=reference_period, settings=settings)

In [None]:
df_t = plot.transform_df(df, year, reference_period)
# df_t.to_csv(f"tmp/{metric['data']}-berlin-1941-2023_transformed.csv")
# df_t = pd.read_csv("tmp/temperature_2m_mean-berlin-1941-2023_transformed.csv", index_col=0)

df_t

In [None]:
df_t.describe()

In [None]:
def normalize_diff(series, fill_na: bool = True):
    """
    Normalize a series to the range [0, 1] with initial
    values below 0 between [0, 0.5] and above 0 between [0.5, 1].
    Values will later be used for the colorscale of the plot.
    """
    series = np.array(series)

    # Fill NaNs with 0
    if fill_na:
        series = np.nan_to_num(series)

    # Masks for negative and positive values
    negative_mask = series < 0
    positive_mask = series > 0

    series_norm = series.copy()

    # Normalize negative values to [0, 0.5] using the mask
    max_value = series_norm[negative_mask].max()
    min_value = series_norm[negative_mask].min()
    series_norm[negative_mask] = (
        (series_norm[negative_mask] - min_value) / (max_value - min_value) * 0.5
    )

    # Normalize positive values to [0.5, 1] using the mask
    max_value = series_norm[positive_mask].max()
    min_value = series_norm[positive_mask].min()
    series_norm[positive_mask] = (series_norm[positive_mask] - min_value) / (
        max_value - min_value
    ) * 0.5 + 0.5

    return series_norm

In [None]:
def dayofyear_to_date(
    year: int, dayofyear: int, adj_leap: bool = False
) -> dt.datetime:
    """
    Convert a day of the year to a date.

    Parameters
    ----------
    year : int
        The year of the date.
    day_of_year : int
        The day of the year.
    adj_leap : bool, optional
        Adjust for leap years if years were reduced to 365 days
        by default False
    """
    # Check if year is a leap year, adjust day after Feb 28 if so
    if adj_leap and isleap(year) and dayofyear > (31 + 28):
        dayofyear += 1

    # Calculate the date for the given day of the year
    target_date = dt.datetime(year, 1, 1) + dt.timedelta(days=dayofyear - 1)

    return target_date

In [None]:
# Create a column with the normalized difference
df_t[f"{year}_diff_norm"] = normalize_diff(df_t[f"{year}_diff"])

# Add a column with the date
df_t["date"] = df_t["dayofyear"].apply(lambda x: dayofyear_to_date(year, x, True))

df_t

## Create an interactive graph

In [None]:
show_min_max = False

In [None]:
fig = go.Figure()

fig.add_traces(
    [
        # Mean trace
        go.Scatter(
            x=df_t["date"],
            y=df_t["mean"],
            name="Mean",
            line=dict(color="#000", width=2.5),
            showlegend=False,
            hovertemplate=(
                "%{y:.1f}"
                f"{plot.settings['metric']['unit']}"
                f"<extra><b>Mean {reference_period[0]}-{reference_period[1]}</b></extra>"
            ),
        ),
        # Each day's difference to reference mean
        go.Bar(
            x=df_t["date"],
            y=df_t[f"{year}_diff"],
            base=df_t["mean"],
            name=f"{year} value",
            marker=dict(
                color=df_t[f"{year}_diff_norm"],
                colorscale="RdYlBu_r",
                line=dict(width=0),
            ),
            showlegend=False,
            hovertemplate=(
                "%{y:.1f}"
                f"{plot.settings['metric']['unit']}<extra></extra>"
            ),
        ),
    ]
)

fig.add_traces(
    [
        # p95 trace
        go.Scatter(
            x=df_t["date"],
            y=df_t["p95"],
            name="P95",
            line=dict(color="#000", width=1, dash="dot"),
            showlegend=False,
            hovertemplate=(
                "%{y:.1f}"
                f"{plot.settings['metric']['unit']}"
                f"<extra><b>95th percentile {reference_period[0]}-{reference_period[1]}</b></extra>"
            ),
        ),
        # Fill area between p05 and p95 (last trace added)
        go.Scatter(
            x=df_t["date"],
            y=df_t["p05"],
            fill="tonexty",
            fillcolor="#f8f8f8",
            # Make line transparent
            line=dict(color="rgba(0,0,0,0)"),
            showlegend=False,
            # Remove hoverinfo
            hoverinfo="skip",
        ),
        # p05 trace
        go.Scatter(
            x=df_t["date"],
            y=df_t["p05"],
            name="P05",
            line=dict(color="#000", width=1, dash="dot"),
            showlegend=False,
            hovertemplate=(
                "%{y:.1f}"
                f"{plot.settings['metric']['unit']}"
                f"<extra><b>5th percentile {reference_period[0]}-{reference_period[1]}</b></extra>"
            ),
        ),
    ]
)

if show_min_max:
    fig.add_traces(
        [
            # max trace
            go.Scatter(
                x=df_t["date"],
                y=df_t["max"],
                name="max",
                line=dict(color="#ccc", width=0.5),
                showlegend=False,
                hovertemplate=(
                "%{y:.1f}"
                f"{plot.settings['metric']['unit']}"
                f"<extra><b>Maximum {reference_period[0]}-{reference_period[1]}</b></extra>"
            ),
            ),
            # min trace
            go.Scatter(
                x=df_t["date"],
                y=df_t["min"],
                name="min",
                line=dict(color="#ccc", width=0.5),
                showlegend=False,
                hovertemplate=(
                "%{y:.1f}"
                f"{plot.settings['metric']['unit']}"
                f"<extra><b>Minimum {reference_period[0]}-{reference_period[1]}</b></extra>"
            ),
            ),
        ]
    )

# Add white template to plot
fig.update_layout(
    title=dict(
        text=(
            f"<b>{metric['name']} in {location_name} {year}</b><br />"
            f"<sup>Compared to average of {metric['name']} ({reference_period[0]}-{reference_period[1]})</sup>"
        ),
        font=dict(
            family="Lato",
            size=32,
            color="#1f1f1f",
        ),
        x=1,
    ),
    template="plotly_white",
    hovermode="x",
    bargap=0,
    width=1000,
    height=600,
    xaxis=dict(
        hoverformat="%e %B",
    ),
    font=dict(
        family="Lato",
        size=12,
        color="#1f1f1f",
    ),
)

fig.update_xaxes(
    dtick="M1",  # Tick every month
    tickformat="%b",  # Month name
    ticklabelmode="period",  # Center tick labels
)

# Reverse order of traces so that the bars are on top
# TODO: This makes the filled area disappear behind the canvas
fig.data = fig.data[::-1]

fig.show()

In [None]:
importlib.reload(utils)
plot = utils.MeteoHist(df, year, reference_period=reference_period, settings=settings)
plot.create_plot();

In [None]:
# Filter dataframe to reference period
df_ref = df[df["date"].dt.year.between(*(1941, 1970))].copy()
df_current = df[df["date"].dt.year.between(*(1991, 2020))].copy()

In [None]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=df_current["value"], name="1991-2020", bingroup=1, xbins=dict(start=0, end=40, size=0.5)))
fig.add_trace(go.Histogram(x=df_ref["value"], name="1941-1970", bingroup=1, xbins=dict(start=0, end=40, size=0.5)))


# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.update_layout(
    template="plotly_white",
    hovermode="x",
)
fig.show()