In [None]:
!pip install matplotlib 
!pip install pandas
!pip install plotly
!pip install nbformat


In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Constants
READ_SIZE_MB = 64/1024
INSERT_SIZE_MB = 1/1024
INSERT_BATCH_SIZE_MB = 100/1024

# Load + preprocess main log
df = pd.read_csv('logs/client_stats.log')
df['relative_time_s'] = (df['timestamp'] - df['timestamp'].min()) / 1000
df = df.sort_values(['client_id','op_type','timestamp'])
df['time_diff_s'] = df.groupby(['client_id','op_type'])['relative_time_s'].diff()
df['throughput'] = df['count']/df['time_diff_s']
df['throughput_mb_s'] = np.select(
    [df.op_type=='READ', df.op_type=='INSERT', df.op_type=='INSERT_BATCH'],
    [df.throughput*READ_SIZE_MB, df.throughput*INSERT_SIZE_MB, df.throughput*INSERT_BATCH_SIZE_MB],
    default=np.nan
)
df_th = df.dropna(subset=['throughput_mb_s'])

# Split by op_type
df_read = df_th[df_th.op_type=='READ'].copy()
df_insert = df_th[df_th.op_type=='INSERT']
df_insert_batch = df_th[df_th.op_type=='INSERT_BATCH']
df_queue = df[df.op_type=='QUEUE']
df_read_latency = df[df.op_type=='READ']
df_insert_latency = df[df.op_type=='INSERT']

# Cache metrics
df_read['user_cache_total'] = df_read.user_cache_hits + df_read.user_cache_misses
df_read['user_cache_hit_rate'] = np.where(df_read.user_cache_total>0,
                                          df_read.user_cache_hits/df_read.user_cache_total*100,
                                          np.nan)
df_cache = df_read[['relative_time_s','client_id','user_cache_usage']].dropna()
pivot_cache = df_cache.pivot_table(index='relative_time_s',
                                   columns='client_id',
                                   values='user_cache_usage',
                                   aggfunc='mean').fillna(0)

# Create subplots (13 rows)
fig = make_subplots(rows=13, cols=1, shared_xaxes=True, vertical_spacing=0.01,
                    subplot_titles=[
                        f"Per-Client READ Throughput for read size {READ_SIZE_MB*1024:.0f}KB",
                        "Per-Client WRITE Throughput (INSERT & INSERT_BATCH)",
                        "Combined READ & WRITE Throughput",
                        "Per-Client READ P10 Latency", "Per-Client READ P25 Latency",
                        "Per-Client READ P50 Latency", "Per-Client READ P99 Latency",
                        "Per-Client READ Max Latency", "Per-Client INSERT P99 Latency",
                        "Per-Client QUEUE P99 Latency", "Per-Client User Cache Hit Rate",
                        "Per-Client User Cache Usage (Stacked)",
                        "SSD Throughput Over Time"
                    ])

def add_group(df_group, row, ycol, name_fmt):
    for cid, grp in df_group.groupby('client_id'):
        fig.add_trace(go.Scatter(x=grp.relative_time_s, y=grp[ycol], name=name_fmt.format(cid)),
                      row=row, col=1)

# Populate subplots 1–12
add_group(df_read, 1, 'throughput_mb_s', "Client {}")
add_group(df_insert, 2, 'throughput_mb_s', "Client {} INSERT")
add_group(df_insert_batch, 2, 'throughput_mb_s', "Client {} INSERT_BATCH")
for (cid, op), grp in df_th.groupby(['client_id','op_type']):
    fig.add_trace(go.Scatter(x=grp.relative_time_s, y=grp.throughput_mb_s,
                             name=f"Client {cid}, {op}"), row=3, col=1)
for col_name, row_num in [('10p',4), ('25p',5), ('50p',6), ('99p',7), ('max',8)]:
    add_group(df_read_latency, row_num, col_name, "Client {}")
add_group(df_insert_latency, 9, '99p', "Client {}")
add_group(df_queue, 10, '99p', "Client {}")
add_group(df_read, 11, 'user_cache_hit_rate', "Client {}")
for cid in pivot_cache.columns:
    fig.add_trace(go.Scatter(x=pivot_cache.index, y=pivot_cache[cid]/1024**2,
                             name=f"Client {cid}", stackgroup="one"), row=12, col=1)

# === New SSD throughput subplot (row 13) — first 1/5th only ===
df_iostat = pd.read_csv("iostat_results.csv")
cutoff = len(df_iostat) // 5
time_seconds = np.arange(cutoff) + df['relative_time_s'].min()
fig.add_trace(go.Scatter(x=time_seconds, y=df_iostat["rMB/s"].iloc[:cutoff], name="SSD Read MB/s", mode="lines+markers"), row=13, col=1)
fig.add_trace(go.Scatter(x=time_seconds, y=df_iostat["wMB/s"].iloc[:cutoff], name="SSD Write MB/s", mode="lines+markers"), row=13, col=1)

# Axis labels
y_labels = ['Throughput (MB/s)']*3 + ['Latency (ms)']*7 + ['Hit Rate (%)','Cache Usage (MB)','MB/s']
for r,label in enumerate(y_labels, start=1):
    fig.update_yaxes(title_text=label, row=r, col=1)
fig.update_xaxes(title_text="Time (s)", row=13, col=1)
fig.update_yaxes(range=[0,400], row=5, col=1)

fig.update_layout(height=3900, width=1000, showlegend=False)
fig.show()
