# Overview

In [14]:
import datetime
import json
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
import typing

import seaborn as sns

MAX_CORES = 8

base_path = "/workspaces/muBench-experiment/gssi_experiment/gateway_offloading/results/small_experiment_2/"

pincirollis_data_path = "/workspaces/muBench-experiment/gssi_experiment/gateway_offloading/multi_N25.csv"
synth_df = pd.read_csv(pincirollis_data_path, header=0)
synth_df.describe()

Unnamed: 0,N_A,N_B,r_Z_A,r_Z_B,r_gw,r_A_s1,r_B_s2,r_B_s3,R0,R0_low,...,Us2,Us2_low,Us2_up,Us3,Us3_low,Us3_up,Ugw,Ugw_low,Ugw_up,sim_time_sec
count,286.0,286.0,286.0,286.0,286.0,286.0,286.0,286.0,286.0,286.0,...,286.0,286.0,286.0,286.0,286.0,286.0,286.0,286.0,286.0,286.0
mean,12.5,12.5,0.01,0.01,90909090000000.0,0.069888,0.191201,0.112263,237.834509,233.186402,...,0.418035,0.408003,0.428067,0.59962,0.58769,0.611551,0.563762,0.552917,0.574606,14.602984
std,7.513146,7.513146,0.0,0.0,287983700000000.0,0.015599,0.121884,0.040743,55.943494,54.858423,...,0.259071,0.252861,0.265292,0.326274,0.321127,0.331465,0.360015,0.354665,0.365403,6.472811
min,0.0,0.0,0.01,0.01,0.1,0.05,0.083333,0.066667,170.697216,166.841761,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.315408
25%,6.0,6.0,0.01,0.01,0.125,0.055556,0.1,0.076923,201.029826,197.440417,...,0.179055,0.174339,0.183529,0.323831,0.315499,0.332041,0.235576,0.230077,0.241032,11.374669
50%,12.5,12.5,0.01,0.01,0.2,0.066667,0.142857,0.1,225.564546,220.526819,...,0.425032,0.414622,0.435246,0.633879,0.62091,0.645304,0.571179,0.557475,0.584883,13.147125
75%,19.0,19.0,0.01,0.01,0.5,0.083333,0.25,0.142857,251.797768,247.405509,...,0.673659,0.655194,0.692124,0.933634,0.913315,0.94722,0.954792,0.936184,0.969459,15.928903
max,25.0,25.0,0.01,0.01,1000000000000000.0,0.1,0.5,0.2,501.360385,492.749283,...,0.803877,0.786286,0.821469,0.997916,0.985493,1.018375,0.999995,0.987481,1.01251,61.272047


# Dataframe creation

In [15]:
columns = [
    # Experiment Metadata
    "dashboard_intensity",
    "run_index",
    "replications",
    "trials",
    "cpu_cap",
    # Other metadata
    "experiment_start",
    "experiment_stop",
    # Results latency
    "real_avg_latency_ms",
    "synth_latency_ms",
    # Results CPU utilization.
    "cpu_utilization_readings",
    "request_delay_readings",
]


# Results CPU utilization.
services = ["gw", "s1", "s2", "s3"]
real_cpu_key_format = "{service}_real_avg_cpu_utilization"
columns.extend([real_cpu_key_format.format(service=service) for service in services])
synth_cpu_key_format = "{service}_synth_cpu_utilization"
columns.extend([synth_cpu_key_format.format(service=service) for service in services])

In [16]:
from gssi_experiment.util.util import (
    iterate_through_nested_folders,
    lies_outside_timewindow,
)


def get_experiment_params(folder_name: str) -> tuple:
    # HACK: This entire function.
    folder_chunks = folder[len(base_path) :].split("/")[1:]
    dat = folder_chunks[0].split("_")
    cpu_cap = int(dat[0][:-1])
    reps = int(dat[1][: -len("rep")])
    trials = int(dat[2][: -len("trials")])

    run_idx = int(folder_chunks[1].split("_")[1])
    step = int(folder_chunks[3].split("_")[0])
    offload = int(folder_chunks[4].split("_")[0])

    return cpu_cap, reps, trials, run_idx, step, offload


def get_experiment_start_end_time(
    dir_name: str,
) -> typing.Tuple[datetime.datetime, datetime.datetime]:
    TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.000Z"
    with open(f"{dir_name}/metadata.json", "r", encoding="utf-8") as metadata_file:
        j_data = json.loads(metadata_file.read())
    start_time = datetime.datetime.strptime(j_data["start_time"], TIME_FORMAT)
    end_time = datetime.datetime.strptime(j_data["end_time"], TIME_FORMAT)
    return start_time, end_time


def get_utilization_data(dir_name: str):
    TIME_FORMAT = "%Y-%m-%d %H:%M:%S"

    # Filters CPU utilization features by using the experiment's time window.
    exp_df = pd.read_csv(f"{dir_name}/cpu_utilization.csv", header=0)
    indices_outside_timewindow = [
        index
        for index in exp_df.index
        if lies_outside_timewindow(index, start_time, end_time, TIME_FORMAT)
    ]
    exp_df = exp_df.drop(indices_outside_timewindow)

    # Some reading can be incomplete; these are dropped.
    exp_df = exp_df.dropna()

    # Constructs average CPU utilization datapoint.
    data_point = [np.average(exp_df[col]) for col in services]
    n = len(exp_df)

    return data_point, n


def get_mubench_results(dir_name: str):
    path = f"{dir_name}/mubench_results.csv"
    exp_df = pd.read_csv(path, header=0)
    avg_latency = np.average(exp_df["latency_ms"])
    n_avg_latency = len(exp_df["latency_ms"])
    return avg_latency, n_avg_latency


df = pd.DataFrame(columns=columns)


SYNTH_MAX_STEP = 25
MAX_STEPS = 5
STEP_RATIO = int(SYNTH_MAX_STEP / MAX_STEPS)

entry_counter = 1

# Iterates through each of the results folders.
for folder in iterate_through_nested_folders(base_path, max_depth=4):
    # Meta data.
    cpu_cap, reps, trials, run_idx, step, offload = get_experiment_params(folder)
    dashboard_intensity = step / MAX_STEPS
    start_time, end_time = get_experiment_start_end_time(folder)

    # Experimental results.
    utilization, n_utilization = get_utilization_data(folder)
    avg_latency, n_avg_latency = get_mubench_results(folder)

    # Theoretical results.
    synth_data_point = synth_df[synth_df["N_A"] == SYNTH_MAX_STEP - step * STEP_RATIO]
    synth_utilization = synth_data_point[["Ugw", "Us1", "Us2", "Us3"]].values[0]
    synth_latency_ms = synth_data_point["R0"].values[0]

    data_point = [
        dashboard_intensity,
        run_idx,
        reps,
        trials,
        cpu_cap,
        start_time,
        end_time,
        avg_latency,
        synth_latency_ms,
        n_utilization,
        n_avg_latency,
        *utilization,
        *synth_utilization,
    ]

    df.loc[entry_counter, :] = data_point

df

KeyError: 'gw'

# Request Delay

# CPU Utilization