### Load input data

In [None]:
import pandas as pd

df_h = pd.read_csv("data/data_est_hourly.csv.bz2", index_col=0, parse_dates=True)
df_d = pd.read_csv("data/data_est_daily.csv.bz2", index_col=0, parse_dates=True)

for df in [df_h, df_d]:
    df["message_overhead"] = df["message_size"] - df["payload_size"]
    df["tcpip_overhead"] = df["tcpip_size"] - df["message_size"]

# Analysis

- [x] Function to plot absolute traffic
- [ ] Function to plot relative traffic (not needed I think)
- [x] Plot all message types

### Plotting function

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import EngFormatter


def plot_type(df, msg_type, how="absolute"):
    """Plot a stacked area chart for a given message type."""

    df_msg_type = df[df["msg_type"] == msg_type] if msg_type != "all" else df
    df_grouped = df_msg_type.groupby(df_msg_type.index).sum()

    timestamps = df_grouped.index
    payload = df_grouped["payload_size"].values
    message = df_grouped["message_overhead"].values
    tcpip = df_grouped["tcpip_overhead"].values

    fig, ax = plt.subplots()

    ax.stackplot(
        timestamps,
        payload,
        message,
        tcpip,
        labels=["payload", "P2P protocol overhead", "TCP/IP overhead"],
    )

    fig.suptitle(f"{msg_type.upper()} messages")
    ax.set_title("Daily P2P traffic breakdown into payload and overhead")
    ax.legend(loc="upper left", title="")
    formatter = EngFormatter(unit="B")
    ax.yaxis.set_major_formatter(formatter)
    ax.xaxis.set_major_locator(mdates.DayLocator(interval=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %d"))

    plt.show()
    fig.savefig(
        f"breakdown-{msg_type}-{how}", dpi=300, bbox_inches="tight", facecolor="white"
    )

In [None]:
message_types = df_d["msg_type"].unique()
df_d

plot_type(df_d["2025-01-24":], "all")

for msg_type in message_types:
    plot_type(df_d, msg_type)