In [50]:
import csv
import matplotlib.pyplot as plt
import random
from typing import Optional

stat_dump_interval_ms = 500
stat_dump_interval_s = stat_dump_interval_ms / 1000

def plot_client_results(output_file, xlim, version):
    # Define the file path
    if version is None:
        input_file_path = 'logs/client_stats.log'
    else:
        input_file_path = f'results/{version}/client_stats.log'

    client_data = {}
    start_time = None

    # Open and read the CSV file
    with open(input_file_path, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row

        for row in reader:
            timestamp_str, client_id, op_type, count, max_val, min_val, avg, p50, p90, p99, p999 = row
            timestamp_s = int(timestamp_str) / 1000
            if start_time is None:
                start_time = timestamp_s

            if client_id not in client_data:
                client_data[client_id] = {'Tput': [], '50': [], '99': [], '999': [], 'ts': [], 'count': []}

            multiplier = 1
            if op_type == 'INSERT_BATCH':
                multiplier = 1000
            elif op_type == 'SCAN':
                multiplier = 100

            client_data[client_id]['Tput'].append(multiplier * int(count) * 16 * 1024 / (1024 * 1024) / stat_dump_interval_s)
            client_data[client_id]['50'].append(float(p50) / 1000)
            client_data[client_id]['99'].append(float(p99) / 1000)
            client_data[client_id]['999'].append(float(p999) / 1000)
            client_data[client_id]['ts'].append(timestamp_s)
            client_data[client_id]['count'].append(int(count))
    
    # Get all the time points:
    all_time_points = [ts for client_id in client_data for ts in client_data[client_id]['ts']]
    all_time_points = sorted(list(set(all_time_points)))

    # For each time point, sum the client throughputs.
    client_tput_sums = []
    for ts in all_time_points:
        cur_sum = 0
        for client_id in client_data:
            if ts in client_data[client_id]['ts']:
                idx = client_data[client_id]['ts'].index(ts)
                cur_sum += client_data[client_id]['Tput'][idx]
        client_tput_sums.append(cur_sum)
    all_time_points = [ts - start_time for ts in all_time_points]

    import numpy as np

    def plot_latency_heatmap(metric, title, ylabel):
        # Extract unique, sorted time points
        all_time_points = sorted(list(set(ts - start_time for client_id in client_data for ts in client_data[client_id]['ts'])))
        print(all_time_points)
        # all_time_points = [ts for ts in all_time_points if 0 <= float(ts) <= 300]
        client_ids = list(client_data.keys())
        
        # Set up figure with one row per client
        fig, axs = plt.subplots(len(client_ids), 1, figsize=(16, len(client_ids) * 1.5), sharex=True)
        fig.suptitle(title)
        
        # Establish a color range
        all_latencies = [latency for client_id in client_data for latency in client_data[client_id][metric]]
        vmin, vmax = min(all_latencies), max(all_latencies)
        epsilon = (vmax - vmin) * 0.05  # Small offset below min value
        
        for i, client_id in enumerate(client_ids):
            client_times = client_data[client_id]['ts']
            client_latencies = client_data[client_id][metric]
            
            # Create an array with NaNs
            latency_row = np.full(len(all_time_points), np.nan)
            
            # Populate the array with actual latencies
            for j, ts in enumerate(all_time_points):
                if ts in client_times:
                    idx = client_times.index(ts)
                    assert idx==j
                    latency_row[j] = client_latencies[idx]
            
            # Replace NaNs with a value slightly lower than vmin
            latency_row = np.nan_to_num(latency_row, nan=(vmin - epsilon))
            
            # Plot heatmap for the client
            heatmap = axs[i].imshow([latency_row], aspect='auto', cmap='viridis', vmin=vmin, vmax=vmax)
            
            # Label each subplot for the client
            axs[i].set_ylabel(f'Client {client_id}')
            axs[i].yaxis.set_label_position("left")
            axs[i].set_yticks([])
            axs[i].set_xlim(0, 300)  # Adjusted for the length of time points
        
        # Colorbar and x-axis labeling
        fig.colorbar(heatmap, ax=axs, orientation='horizontal', label=ylabel)
        axs[-1].set_xlabel('Time Points')
        plt.show()

    import plotly.graph_objects as go
    import plotly.io as pio

    def plot_latency_interactive(metric, title, ylabel, xlim):
        fig = go.Figure()

        # Iterate through each client and add its latency data to the plot
        for client_id, stats in client_data.items():
            time_points = [ts - start_time for ts in client_data[client_id]['ts']]
            data_points = [x for x in stats[metric]]
            
            fig.add_trace(go.Scatter(
                x=time_points,
                y=data_points,
                mode='markers',
                marker=dict(size=6),
                name=f'Client {client_id}'
            ))

        # Update layout for readability
        fig.update_layout(
            title=title,
            xaxis_title='Time (s)',
            yaxis_title=ylabel,
            template='plotly_white',
            xaxis=dict(range=xlim),
        )
        
        fig.show()

# Call the function

    plot_latency_interactive('Tput', 'Client Throughput', 'MB/s', xlim=xlim)
    plot_latency_interactive('50', 'Latency: 50p', '50th Percentile (ms)', xlim=xlim)
    plot_latency_interactive('99', 'Latency: 99p', '99th Percentile (ms)', xlim=xlim)
    # plot_latency_interactive('count', 'Operation Count', 'Count', xlim=xlim)

    # plot_latency_heatmap('50', 'Latency: 50p', '50th Percentile (ms)')
    # plot_latency_heatmap('99', 'Latency: 99p', '99th Percentile (ms)')
    return start_time


In [51]:
import matplotlib.pyplot as plt
import re
from datetime import datetime

def plot_memtable_stats(output_file, start_time, xlim, version: Optional[str]=None):
    # Define the file paths
    if version is None:
        input_file_path = '/home/windsey/ycsb-rocksdb-data/LOG'
    else:
        input_file_path = f'results/{version}/LOG'

    cf_data = {}

    # Compile the regular expressions for matching lines
    
    # memtable_regex = re.compile(r'^memtables,([^,]+),(\d+),(\d+)MB$')
    # memtable_regex = re.compile(r'^memtables,([^,]+),(\d+),(\d+)MB$')
    memtable_pattern = re.compile(r'(\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}).\d{6} \d+ .*.cc:\d+\] mt,([^,]+),([^,]+).*')


    def timestamp_to_seconds(timestamp_str):
        timestamp_str = timestamp_str.strip()
        timestamp = datetime.strptime(timestamp_str, '%Y/%m/%d-%H:%M:%S')
        epoch = datetime(1970, 1, 1)
        return (timestamp - epoch).total_seconds()

    # Open and read the input file
    with open(input_file_path, 'r') as file:
        lines = file.readlines()

        for i, line in enumerate(lines):
            memtable_match = memtable_pattern.match(line)
            if not memtable_match:
                continue
                
            ts, cf_name, operation = memtable_match.group(1, 2, 3)
            operation = operation.strip()
            if cf_name not in cf_data:
                # Assume 0 active memtables to start. Thus, this is basically tracking imm tables pending flush
                cf_data[cf_name] = {"counts" : [0], "ts" : [start_time]}
            cf_data[cf_name]["ts"].append(timestamp_to_seconds(ts))
            if operation == 'add':
                cf_data[cf_name]["counts"].append(cf_data[cf_name]["counts"][-1] + 1)
            elif operation == 'remove':
                cf_data[cf_name]["counts"].append(cf_data[cf_name]["counts"][-1] - 1)
            else:
                print('invalid memtable operation name')
                
                
    import plotly.graph_objects as go

    def plot_metric_plotly(metric, colors):
        fig = go.Figure()
        idx = 0

        for cf_name, stats in cf_data.items():
            all_cfs = ['default', 'cf2', 'cf3', 'cf4']
            label = f'Client {idx}' if cf_name not in all_cfs else f'Client {all_cfs.index(cf_name)}'

            # Get time and data points for adds and removes separately
            time_points = [ts - start_time for ts in stats["ts"]]
            data_points = [int(x) for x in stats[metric]]

            # Create a step-like plot by duplicating each time and value point
            step_time_points = []
            step_data_points = []

            for i in range(len(time_points)):
                step_time_points += [time_points[i], time_points[i]]
                step_data_points += [data_points[i-1] if i > 0 else 0, data_points[i]]

            fig.add_trace(go.Scatter(
                x=step_time_points,
                y=step_data_points,
                mode='lines',
                name=label,
                line=dict(shape='hv')
            ))

            idx += 1

        # Update layout
        fig.update_layout(
            title="Memtable Counts Over Time",
            xaxis_title="Time (s)",
            yaxis_title="Memtable Count",
            legend_title="Clients",
            xaxis=dict(showgrid=True, range=xlim),
            yaxis=dict(showgrid=True)
        )

        fig.show()

    # Call the function with colors
    plot_metric_plotly('counts', ['blue', 'lightblue'])

def plot_level_stats(output_file, axs, fig_loc):
    # Define the file paths
    input_file_path = '/home/windsey/ycsb-rocksdb-data/LOG'

    level_data = {}
    level_hit_pattern = re.compile(r'rocksdb.l(0|1|2|3).hit COUNT : (\d+).*')

    hits = [None, None, None, None]
    with open(input_file_path, 'r') as file:
        lines = file.readlines()

        for i, line in enumerate(lines):
            level_hit_match = level_hit_pattern.match(line)
            if not level_hit_match:
                continue
            level, new_hits = level_hit_match.group(1, 2)
            # Always overwrite.
            hits[int(level)] = int(new_hits)

    cumulative_hits = []
    cumulative_count = 0
    for hit in hits:
        cumulative_count += hit
        cumulative_hits.append(cumulative_count)
    cumulative_hits_prop = [x / cumulative_count * 100 for x in cumulative_hits]
    
    axs[fig_loc[0]].plot(["l0", "l1", "l2", "l3"], cumulative_hits_prop, marker='o')

    axs[fig_loc[0]].set_title("Level Hits CDF")
    axs[fig_loc[0]].grid(True)
    axs[fig_loc[0]].set_ylabel('Cumulative Hits (%)')


In [52]:
import matplotlib.pyplot as plt
import re
import json
from datetime import datetime

def get_compaction_color(cf_name, level):
    # Red
    if cf_name == "default":
        if level == 1:
            return "#fc9598"
        elif level == 2:
            return "#ff696e"
        elif level == 3:
            return "#f02225"
        elif level == 4:
            return "#cf1d20"
        elif level == 5:
            return "#ab1619"
        elif level == 6:
            return "#851114"
        else:
        # elif level == 7:
            return "#610c0e"
    # Blue
    elif cf_name == "cf2":
        if level == 1:
            return "#95c2fc"
        elif level == 2:
            return "#699eff"
        elif level == 3:
            return "#225bf0"
        elif level == 4:
            return "#1d4bcf"
        elif level == 5:
            return "#1638ab"
        elif level == 6:
            return "#112985"
        else:
            # elif level == 7:
            return "#0c1d61"
    # Orange
    elif cf_name == "cf3":
        if level == 1:
            return "#ffd1b3"  # light orange
        elif level == 2:
            return "#ffa366"  # medium light orange
        elif level == 3:
            return "#ff7519"  # medium orange
        elif level == 4:
            return "#cc5c14"  # medium dark orange
        elif level == 5:
            return "#993d0f"  # dark orange
        elif level == 6:
            return "#66260a"  # darker orange
        else:
            # elif level == 7:
            return "#331305"  # darkest orange
    elif cf_name == "cf4":
        if level == 1:
            return "#c9fcb2"  # light green
        elif level == 2:
            return "#9ff987"  # medium light green
        elif level == 3:
            return "#75f35d"  # medium green
        elif level == 4:
            return "#58d740"  # medium dark green
        elif level == 5:
            return "#46b334"  # dark green
        elif level == 6:
            return "#328028"  # darker green
        else:
            # elif level == 7:
            return "#21571d"  # darkest green


def plot_rocksdb_events(output_file, experiment_start_time, xlim, version: Optional[str]=None):
    if version is None:
        log_file_path = '/home/windsey/ycsb-rocksdb-data/LOG'
    else:
        log_file_path = f'results/{version}/LOG'
    flush_regex = re.compile(
        r'(\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}) \d+ \[/flush_job\.cc:\d+\] \[(.*?)\] \[JOB \d+\] Flush: (\d+) microseconds, \d+ cpu microseconds, (\d+) bytes'
    )
    l0_stall_pattern = re.compile(r'(\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}.\d{6}) \d+ \[WARN\] \[/column_family.cc:\d+\] \[([^,]+)\] Stalling writes because we have \d+ level-0 files rate (\d+)')
    memtable_stall_pattern = re.compile(r'(\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}.\d{6}) \d+ \[WARN\] \[/column_family.cc:\d+\] \[([^,]+)\] Stalling writes because we have \d+ immutable memtables.*rate (\d+)')
    pending_compaction_stall_pattern = re.compile(r'(\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}.\d{6}) \d+ \[WARN\] \[/column_family.cc:\d+\] \[([^,]+)\] Stalling writes because of estimated pending compaction bytes \d+ rate (\d+)')
    memtable_stop_pattern = re.compile(r'(\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}.\d{6}) \d+ \[WARN\] \[/column_family.cc:\d+\] \[([^,]+)\] Stopping writes because we have \d+ immutable memtables.*')
    level0_stop_pattern = re.compile(r'(\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}) \d+ \[WARN\] \[/column_family\.cc:\d+\] \[([^\]]+)\] Stopping writes because we have \d+ level-0 files.*')

    compaction_regex = re.compile(r'.*EVENT_LOG_v1 (.*)$')
    def timestamp_to_seconds(timestamp_str):
        timestamp = datetime.strptime(timestamp_str, '%Y/%m/%d-%H:%M:%S.%f')
        epoch = datetime(1970, 1, 1)
        return (timestamp - epoch).total_seconds()

    def timestamp_to_micros(timestamp_str):
        timestamp_format = '%Y/%m/%d-%H:%M:%S.%f'
        dt = datetime.strptime(timestamp_str, timestamp_format)
        epoch = datetime(1970, 1, 1)
        micros_since_epoch = int((dt - epoch).total_seconds() * 1000000)
        return micros_since_epoch

    # Initialize lists for all events
    l0_stalls, memtable_stalls, pending_compaction_stalls = [], [], []
    compaction_data = {}
    flush_data = {}
    memtable_stops = []
    level0_stops = []
    max_rate = 0
    # Process the log file for stall, flush, and compaction events
    with open(log_file_path, 'r') as log_file:
        for line in log_file:
            # L0 and Memtable Stalls
            l0_match = l0_stall_pattern.search(line)
            if l0_match:
                timestamp_str, cf_name, rate = l0_match.groups()
                timestamp_micros = timestamp_to_micros(timestamp_str)
                l0_stalls.append((timestamp_micros, int(rate) / 1024 / 1024))
                max_rate = max(max_rate, int(rate) / 1024 / 1024)

            memtable_match = memtable_stall_pattern.search(line)
            if memtable_match:
                timestamp_str, cf_name, rate = memtable_match.groups()
                timestamp_micros = timestamp_to_micros(timestamp_str)
                memtable_stalls.append((timestamp_micros, int(rate) / 1024 / 1024))
                max_rate = max(max_rate, int(rate) / 1024 / 1024)

            memtable_stop_match = memtable_stop_pattern.search(line)
            if memtable_stop_match:
                timestamp_str, cf_name = memtable_stop_match.groups()
                timestamp_micros = timestamp_to_micros(timestamp_str)
                memtable_stops.append((timestamp_micros, cf_name))

            level0_stop_match = level0_stop_pattern.search(line)
            if level0_stop_match:
                timestamp_str, cf_name = level0_stop_match.groups()
                timestamp_micros = timestamp_to_micros(timestamp_str)
                level0_stops.append((timestamp_micros, cf_name))

            pending_compact_match = pending_compaction_stall_pattern.search(line)
            if pending_compact_match:
                timestamp_str, cf_name, rate = pending_compact_match.groups()
                timestamp_micros = timestamp_to_micros(timestamp_str)
                pending_compaction_stalls.append((timestamp_micros, int(rate) / 1024 / 1024))
                max_rate = max(max_rate, int(rate) / 1024 / 1024)

            # Flush Events
            flush_match = flush_regex.match(line)
            if flush_match:
                timestamp_str, cf_name, flush_microseconds, flush_bytes = flush_match.groups()
                start_time_seconds = timestamp_to_seconds(timestamp_str) - int(flush_microseconds) / 1e6
                rate_MB_s = (int(flush_bytes) / int(flush_microseconds)) * 1e6 / (1024**2)
                if cf_name not in flush_data:
                    flush_data[cf_name] = []
                flush_data[cf_name].append((start_time_seconds, rate_MB_s, int(flush_microseconds)/1e6))
                max_rate = max(max_rate, rate_MB_s)
            
            # Compaction Events
            compaction_match = compaction_regex.match(line)
            if compaction_match:
                json_str = compaction_match.group(1)
                try:
                    event_data = json.loads(json_str)
                    if event_data['event'] != 'compaction_finished':
                        continue
                    end_time_seconds = event_data['time_micros'] / 1e6
                    start_time_seconds = end_time_seconds - event_data['compaction_time_micros'] / 1e6
                    # start_time_seconds = event_data['time_micros'] / 1e6
                    # end_time_seconds = start_time_seconds + event_data['compaction_time_micros'] / 1e6
                    read_rate = event_data['read_rate']
                    write_rate = event_data['write_rate']
                    output_level = event_data['output_level']
                    cf_name = event_data['cf_name']
                    if cf_name not in compaction_data:
                        compaction_data[cf_name] = []
                    compaction_data[cf_name].append((start_time_seconds, end_time_seconds, read_rate, write_rate, output_level))
                    max_rate = max(max_rate, write_rate)
                    max_rate = max(max_rate, read_rate)
                except:
                    print("Compaction json error")

    # Determine the overall start time
    all_start_times = [d[0] for d in [flush_data[cf_name][0] for cf_name in flush_data] + [compaction_data[cf_name][0] for cf_name in compaction_data] + l0_stalls + memtable_stalls]
    # experiment_start_time = min(all_start_times)
    # internal_start_time = min(all_start_times)
    # print(internal_start_time)
    print(f'Exp start time: {experiment_start_time}')

    # Convert L0 and Memtable timestamps to seconds since experiment start
    # Calculate timestamps and rates for each type of stall
    l0_timestamps = [(timestamp_micros / 1e6) - experiment_start_time for timestamp_micros, _ in l0_stalls]
    l0_rates = [int(rate) for _, rate in l0_stalls]

    memtable_timestamps = [(timestamp_micros / 1e6) - experiment_start_time for timestamp_micros, _ in memtable_stalls]
    memtable_rates = [int(rate) for _, rate in memtable_stalls]

    pending_compaction_timestamps = [(timestamp_micros / 1e6) - experiment_start_time for timestamp_micros, _ in pending_compaction_stalls]
    pending_compaction_rates = [int(rate) for _, rate in pending_compaction_stalls]

    # Create a Plotly figure
    fig = go.Figure()

    # Plot L0 Stalls
    fig.add_trace(go.Scatter(
        x=l0_timestamps, y=l0_rates,
        mode='markers', name='L0 Stalls',
        marker=dict(color='blue', size=6)
    ))

    # Plot Memtable Stalls
    fig.add_trace(go.Scatter(
        x=memtable_timestamps, y=memtable_rates,
        mode='markers', name='Memtable Stalls',
        marker=dict(color='purple', size=6)
    ))

    # Plot Pending Compaction Stalls
    fig.add_trace(go.Scatter(
        x=pending_compaction_timestamps, y=pending_compaction_rates,
        mode='markers', name='Pending Compaction Stalls',
        marker=dict(color='orange', size=6)
    ))

    # Plot Flush Events
    for cf_name in flush_data:
        if cf_name == "default":
            color = 'red'
            label = 'Client 0 Flush'
        elif cf_name == 'cf2':
            color = 'blue'
            label = 'Client 1 Flush'
        elif cf_name == 'cf3':
            color = 'orange'
            label = 'Client 2 Flush'
        elif cf_name == 'cf4':
            color = 'green'
            label = 'Client 3 Flush'
        x_values = []
        y_values = []
        for start_time, rate, duration in flush_data[cf_name]:
            x_values.extend([start_time - experiment_start_time, start_time + duration - experiment_start_time, None])  # None to break line between events
            y_values.extend([rate, rate, None])

        fig.add_trace(go.Scatter(
            x=x_values, y=y_values,
            mode="lines+markers",
            line=dict(color=color, width=4, dash="solid"),
            marker=dict(symbol="circle", size=6),
            name=label,
            showlegend=True
        ))
    all_cfs = ['default', 'cf2', 'cf3', 'cf4']

    # Plot Compaction Events
    for cf_name in compaction_data:
        label = f'Client {all_cfs.index(cf_name)} Compaction' if cf_name in all_cfs else f'Columnfamily {cf_name} Compaction'
        color = get_compaction_color(cf_name, output_level)  # Assume get_compaction_color is defined
        x_values = []
        y_values = []
        for start_time, end_time, read_rate, write_rate, output_level in compaction_data[cf_name]:
            x_values.extend([start_time - experiment_start_time, end_time - experiment_start_time, None])  # Use None to separate lines
            y_values.extend([write_rate, write_rate, None])

        # Add a single trace for all compaction events for this client
        fig.add_trace(go.Scatter(
            x=x_values, y=y_values,
            mode="lines+markers",
            line=dict(color=color, width=3, dash="dash"),
            marker=dict(symbol="x", size=6),
            name=label,
            showlegend=True
        ))


    # Plot Memtable Stops as vertical lines
    # Loop over each unique column family and add all its memtable stops as a single trace
    for cf_name in set(cf_name for _, cf_name in memtable_stops):
        all_stop_x = []
        all_stop_y = []
        
        # Collect all x and y coordinates for each stop within the current cf_name
        for memtable_stop_ts, current_cf_name in memtable_stops:
            if current_cf_name == cf_name:
                memtable_stop_ts = (memtable_stop_ts / 1e6) - experiment_start_time
                all_stop_x.extend([memtable_stop_ts, memtable_stop_ts, None])  # Add None to break line
                all_stop_y.extend([0, max_rate, None])  # Add None to break line

        # Add a single trace per cf_name
        fig.add_trace(go.Scatter(
            x=all_stop_x,
            y=all_stop_y,
            mode="lines",
            line=dict(color="brown", width=1, dash="dash"),
            showlegend=True,
            name=f"Memtable Stop {cf_name}"
        ))
    
    for cf_name in set(cf_name for _, cf_name in level0_stops):
        all_stop_x = []
        all_stop_y = []
    
        # Collect all x and y coordinates for each stop within the current cf_name
        for level0_stop_ts, current_cf_name in level0_stops:
            if current_cf_name == cf_name:
                level0_stop_ts = (level0_stop_ts / 1e6) - experiment_start_time
                all_stop_x.extend([level0_stop_ts, level0_stop_ts, None])  # Add None to break line
                all_stop_y.extend([0, max_rate, None])  # Add None to break line

        # Add a single trace per cf_name
        fig.add_trace(go.Scatter(
            x=all_stop_x,
            y=all_stop_y,
            mode="lines",
            line=dict(color="purple", width=1, dash="dash"),
            showlegend=True,
            name=f"L0 File Stop {cf_name}"
        ))

    # Update layout for axis labels, title, and legend
    fig.update_layout(
        title="Database Operations Over Time",
        xaxis=dict(title="Time (seconds since start of experiment)", range=xlim),
        yaxis=dict(title="Rate (MB/s)"),
        legend=dict(x=0.02, y=0.98),
    )

    fig.show()


In [53]:
# Overall throughputs (client + system)

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import datetime
import time

max_read_tput = 700
max_read_iops = 180000
max_write_tput = 400
max_write_iops = 100000

def timestamp_to_seconds(timestamp_str):
  timestamp = datetime.strptime(timestamp_str.rstrip(), '%Y-%m-%d %H:%M:%S.%f')
  epoch = datetime(1970, 1, 1)
  return (timestamp - epoch).total_seconds()

def plot_overall_tputs(output_file, start_time_shift, xlim, version: Optional[str]=None):
  if version == None:
    df_from_csv = pd.read_csv("iostat_results.csv")
  else:
    df_from_csv = pd.read_csv(f'results/{version}/iostat_results.csv')
  
  time_seconds = np.arange(len(df_from_csv))
  time_seconds = [x + start_time_shift for x in time_seconds]
  
  # Create a figure
  fig = go.Figure()
  
  # Plot Read MB/s
  fig.add_trace(go.Scatter(
      x=time_seconds, y=df_from_csv["rMB/s"],
      mode='lines+markers',
      name='Read MB/s',
      marker=dict(symbol='circle', color='green')
  ))
  
  # Plot Write MB/s
  fig.add_trace(go.Scatter(
      x=time_seconds, y=df_from_csv["wMB/s"],
      mode='lines+markers',
      name='Write MB/s',
      marker=dict(symbol='circle', color='red')
  ))
  
  # Update layout for axis labels, limits, and title
  fig.update_layout(
      title="SSD Throughput Over Time",
      xaxis=dict(title="Time (s)", range=xlim),
      yaxis=dict(title="MB/s"),
      legend=dict(x=0.02, y=0.98),  # Position legend in the top-left corner
  )
  
  fig.show()

def plot_overall_iops(output_file, start_time_shift, xlim, version: Optional[str]=None):
  # Load the CSV file
  if version == None:
    df_from_csv = pd.read_csv("iostat_results.csv")
  else:
    df_from_csv = pd.read_csv(f'results/{version}/iostat_results.csv')
  
  # Generate time in seconds with a shift
  time_seconds = np.arange(len(df_from_csv))
  time_seconds = [x + start_time_shift for x in time_seconds]
  
  # Create the figure
  fig = go.Figure()
  
  # Plot Read IOPS
  fig.add_trace(go.Scatter(
      x=time_seconds,
      y=df_from_csv["r/s"],
      mode='lines+markers',
      name='Read IOPS',
      marker=dict(symbol='circle', color='green')
  ))

  # Plot Write IOPS
  fig.add_trace(go.Scatter(
      x=time_seconds,
      y=df_from_csv["w/s"],
      mode='lines+markers',
      name='Write IOPS',
      marker=dict(symbol='circle', color='red')
  ))

  fig.update_layout(
      title="SSD IOPS Over Time",
      xaxis_title="Time (s)",
      yaxis=dict(title="IOPS", range=[0, max(df_from_csv[["r/s", "w/s"]].max().max(), 16)]),
      yaxis2=dict(title="Utilization (based on IOPS)", overlaying='y', side='right', range=[0, 1]),
      legend_title="Metrics",
      xaxis=dict(range=xlim)
  )
  fig.show()

import plotly.graph_objects as go

def plot_io_waittimes(output_file, start_time_shift, xlim, version: Optional[str]=None):
  # Load data
  if version == None:
    df_from_csv = pd.read_csv("iostat_results.csv")
  else:
    df_from_csv = pd.read_csv(f'results/{version}/iostat_results.csv')
  
  # Generate time in seconds, adjusted by start_time_shift
  time_seconds = np.arange(len(df_from_csv))
  time_seconds = [x + start_time_shift for x in time_seconds]
  
  # Calculate Read Await (per KB)
  read_await_per_kb = [
      df_from_csv["r_await"][i] / df_from_csv["rareq-sz"][i] if df_from_csv["rareq-sz"][i] > 0 else df_from_csv["r_await"][i] 
      for i in range(len(df_from_csv["r_await"]))
  ]
  
  # Create figure with secondary y-axis
  fig = go.Figure()
  
  # Plot Read Await (per request)
  fig.add_trace(go.Scatter(
      x=time_seconds, y=df_from_csv["r_await"],
      mode='lines+markers',
      name='Read Await (per req)',
      marker=dict(symbol='circle', color='green')
  ))
  
  # Plot Read Await (per KB) on secondary y-axis
  fig.add_trace(go.Scatter(
      x=time_seconds, y=read_await_per_kb,
      mode='lines+markers',
      name='Read Await (per KB)',
      marker=dict(symbol='x', color='darkgreen'),
      yaxis="y2"  # Assign to secondary y-axis
  ))
  
  # Update layout for axis labels, limits, and title
  fig.update_layout(
      title="IO Wait Times (Queueing + Servicing)",
      xaxis=dict(title="Time (s)", range=xlim),
      yaxis=dict(title="Wait Time (ms)"),
      yaxis2=dict(title="IO Wait Times per KB", overlaying='y', side='right'),
      legend=dict(x=0.02, y=0.98),  # Position legend in the top-left corner
  )
  
  # Save the plot to the specified output file
  fig.show()

def plot_io_reqsize(output_file, start_time_shift, xlim, version: Optional[str]=None):
  # Load data
    if version == None:
        df_from_csv = pd.read_csv("iostat_results.csv")
    else:
        df_from_csv = pd.read_csv(f'results/{version}/iostat_results.csv')

    # Generate time in seconds, adjusted by start_time_shift
    time_seconds = np.arange(len(df_from_csv))
    time_seconds = [x + start_time_shift for x in time_seconds]

    # Create a figure
    fig = go.Figure()

    # Plot Average Read Size
    fig.add_trace(go.Scatter(
        x=time_seconds, y=df_from_csv["rareq-sz"],
        mode='lines+markers',
        name='Avg Read Size',
        marker=dict(symbol='circle', color='green')
    ))

    # Plot Average Write Size
    fig.add_trace(go.Scatter(
        x=time_seconds, y=df_from_csv["wareq-sz"],
        mode='lines+markers',
        name='Avg Write Size',
        marker=dict(symbol='circle', color='red')
    ))

    # Update layout for axis labels, limits, and title
    fig.update_layout(
        title="Avg IO Sizes Over Time",
        xaxis=dict(title="Time (s)", range=xlim),
        yaxis=dict(title="Size (KB)"),
        legend=dict(x=0.98, y=0.98, xanchor="right"),  # Position legend in the upper-right corner
    )

    fig.show()

In [54]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

def plot_cpu_util(output_file, start_time_shift, xlim, version: Optional[str]=None):
    # Load data
    if version == None:
        df_from_csv = pd.read_csv('mpstat_results.csv')
    else:
        df_from_csv = pd.read_csv(f'results/{version}/mpstat_results.csv')
    
    # Clean and filter the data for specific cores
    df_from_csv['core'] = df_from_csv['core'].map(lambda x: x.strip())
    df_filtered = df_from_csv[df_from_csv['core'].isin([str(i) for i in range(16)])].copy()  # Select cores 0 to 15

    # Convert metrics to float
    df_filtered['usr'] = df_filtered['usr'].astype(float)
    df_filtered['sys'] = df_filtered['sys'].astype(float)
    df_filtered['iowait'] = df_filtered['iowait'].astype(float)
    df_filtered['soft'] = df_filtered['soft'].astype(float)
    df_filtered['idle'] = df_filtered['idle'].astype(float)

    # Calculate CPU utilization as (100 - sum of iowait and idle)
    df_filtered['utilization'] = 100 - (df_filtered['iowait'] + df_filtered['idle'])

    # Generate time in seconds, adjusted by start_time_shift
    time_seconds = np.arange(len(df_filtered))
    time_seconds = [x + start_time_shift for x in time_seconds]
    df_filtered['time_seconds'] = time_seconds

    # Create a Plotly figure
    fig = go.Figure()

    # Plot utilization for each core
    for core, group in df_filtered.groupby('core'):
        fig.add_trace(go.Scatter(
            x=group['time_seconds'],
            y=group['utilization'],
            mode="lines",
            name=f'Core {core} Utilization',
            line=dict(width=2)
        ))

    # Update layout for titles, labels, and axes limits
    fig.update_layout(
        title="CPU Utilization (all but iowait and idle)",
        xaxis=dict(title="Time (seconds)", range=xlim),
        yaxis=dict(title="CPU Utilization (%)", range=[-2, 102]),
        legend=dict(x=0.98, y=0.98, xanchor="right"),
    )

    fig.show()



In [55]:
import csv
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict

def plot_rsched_stats(output_file, axs, start_time_s, xlim, fig_loc):
    start_time_us = start_time_s * 1e6
    # Initialize lists to store the data for each client
    timestamps = defaultdict(list)  # Dictionary to store timestamps for each client
    memtables_data = defaultdict(lambda: ([], []))  # (list of values before '-', list of values after '-')
    writes_data = defaultdict(list)
    reads_data = defaultdict(list)

    # Function to parse a line and extract the data
    def parse_csv_row(row):
        timestamp, client_id, write_rate_limit_kbs, read_rate_limit_kbs, write_buffer_size_kb, max_write_buffer_number = row
        timestamp = (float(timestamp) - start_time_us) / 1e6
        client_id = int(client_id)
        timestamps[client_id].append(timestamp)
        memtables_data[client_id][0].append(int(max_write_buffer_number))
        memtables_data[client_id][1].append(int(write_buffer_size_kb) / 1024)
        writes_data[client_id].append(int(write_rate_limit_kbs) / 1024)
        reads_data[client_id].append(int(read_rate_limit_kbs) / 1024)

    # Read the CSV file
    csv_file = 'logs/resource_shares.log'  # Replace with your CSV file name
    with open(csv_file, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row
        for row in reader:
            parse_csv_row(row)

    # Helper function to calculate total allocations
    def calculate_total(data):
        max_length = max(len(lst) for lst in data.values())
        total_allocation = [0] * max_length
        for lst in data.values():
            for i in range(len(lst)):
                total_allocation[i] += lst[i]
        return total_allocation

    # Calculate total allocations
    if len(writes_data) == 0 and len(reads_data) == 0 and len(memtables_data) == 0:
        return

    # Plot the memtables data with two different axes
    ax1 = axs[fig_loc[0]]
    min_length = min(len(data[0]) for data in memtables_data.values())
    cumulative_data = np.zeros(min_length)

    for client in sorted(memtables_data.keys()):
        time_axis = timestamps[client][:min_length]
        current_data = np.array(memtables_data[client][0][:min_length])
        new_cumulative_data = cumulative_data + current_data
        
        ax1.plot(time_axis, new_cumulative_data, label=f'Client {client} #table')
        ax1.fill_between(time_axis, cumulative_data, new_cumulative_data, alpha=0.3)
        
        cumulative_data = new_cumulative_data

    ax1.set_xlabel('Time (s)')
    ax1.set_ylabel('Max num memtables')
    ax1.set_title('Memtable Limits')
    ax1.legend(loc='upper left')
    ax1.set_xlim(xlim)

    # Plot the writes data for all clients
    ax2 = axs[fig_loc[1]]
    min_length = min(len(writes_data[client]) for client in writes_data)
    cumulative_data = np.zeros(min_length)

    for client in sorted(writes_data.keys()):
        time_axis = timestamps[client][:min_length]
        current_data = np.array(writes_data[client][:min_length])
        new_cumulative_data = cumulative_data + current_data
        
        ax2.plot(time_axis, new_cumulative_data, label=f'Client {client}')
        ax2.fill_between(time_axis, cumulative_data, new_cumulative_data, alpha=0.3)
        
        cumulative_data = new_cumulative_data

    ax2.set_title('IO Write Rate Limit')
    ax2.set_xlabel('Time (s)')
    ax2.set_ylabel('MB/s')
    ax2.legend()
    ax2.set_xlim(xlim)
    ax2.set_ylim(5, 300)

    # Plot the reads data for all clients
    ax3 = axs[fig_loc[2]]
    min_length = min(len(reads_data[client]) for client in reads_data)
    cumulative_data = np.zeros(min_length)

    for client in sorted(reads_data.keys()):
        time_axis = timestamps[client][:min_length]
        current_data = np.array(reads_data[client][:min_length])
        new_cumulative_data = cumulative_data + current_data
        
        ax3.plot(time_axis, new_cumulative_data, label=f'Client {client}')
        ax3.fill_between(time_axis, cumulative_data, new_cumulative_data, alpha=0.3)
        
        cumulative_data = new_cumulative_data

    ax3.set_title('IO Read Rate Limit')
    ax3.set_xlabel('Time (s)')
    ax3.set_ylabel('MB/s')
    ax3.legend()
    ax3.set_xlim(xlim)

    plt.tight_layout()
    plt.show()


In [56]:
import time 
from plotly.subplots import make_subplots
import plotly.graph_objects as go

def generate_plots(xlim, output_file):
  version = None
  start_time_s = plot_client_results(output_file, xlim, version)
  plot_memtable_stats(output_file, start_time_s, xlim, version)
  start_time_shift = 0
  plot_rocksdb_events(output_file, start_time_s, xlim, version)
  plot_overall_iops(output_file, start_time_shift, xlim, version)
  plot_overall_tputs(output_file, start_time_shift, xlim, version)
  plot_io_waittimes(output_file, start_time_shift, xlim, version)
  plot_io_reqsize(output_file, start_time_shift, xlim, version)
  plot_cpu_util(output_file, start_time_shift, xlim, version)

xlim = (0, 650)
output_file = "results/timeseries_" + str(int(time.time())) + ".txt"
generate_plots(xlim, output_file)

Exp start time: 1730850657.036
