### Load input data

In [None]:
import pandas as pd

df_h = pd.read_csv(f"data/data_hourly.csv.bz2", index_col=0, parse_dates=True)
df_d = pd.read_csv(f"data/data_daily.csv.bz2", index_col=0, parse_dates=True)

## Plot daily contribution

In [None]:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from matplotlib.ticker import EngFormatter

df = df_d.copy()


df_pivot = df.pivot_table(
    index=df.index, columns="peer_conn_type", values="net_size", aggfunc="sum"
)

start_date = df_pivot.index.unique()[2]
df_pivot = df_pivot[start_date:]

df_pivot = df_pivot.fillna(0)
df_pivot = df_pivot.sort_index()
x = df_pivot.index
y = df_pivot.T.values
labels = df_pivot.columns

fig, ax = plt.subplots(figsize=(12, 8))
ax.stackplot(x, y, labels=labels)
formatter = EngFormatter(unit="B")
ax.yaxis.set_major_formatter(formatter)
ax.xaxis.set_major_locator(mdates.DayLocator(interval=2))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %d"))

ax.set_title("Stacked Area Chart of net_size by peer_conn_type")
ax.legend(ncol=4, title="")

# Rotate x-axis labels for readability
plt.tight_layout()
plt.show()

## Plot daily contribution per connection type

In [None]:
DEFAULT_NUM_CONNECTIONS = {
    "block-relay-only": 2,
    "feeler": 1,
    "outbound-full-relay": 8,
    "inbound": 115 - (8 + 2),
}

df_per_conn = pd.DataFrame()
fig, ax = plt.subplots(figsize=(12, 8))

for col, num_conns in DEFAULT_NUM_CONNECTIONS.items():
    df_per_conn[col] = df_pivot[col] / num_conns
    ax.plot(df_per_conn.index, df_per_conn[col], label=col)


ax.set_title("TCP/IP traffic contribution for a single connection by connection type")
ax.legend(ncol=4, title="")
formatter = EngFormatter(unit="B")
ax.yaxis.set_major_formatter(formatter)
ax.xaxis.set_major_locator(mdates.DayLocator(interval=2))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %d"))

# Rotate x-axis labels for readability
plt.tight_layout()
plt.show()