In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib
%matplotlib inline

sns.set(rc={'figure.figsize':(6,3)}, font_scale=1.0, style='whitegrid', font='CMU Sans Serif')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
plt.rc('axes', unicode_minus=False)

def df_grouped_order_from_data(data):
    new_fs_names = {
        "(AS)-(CA)-(CS)-(CSA)-(CSL)-(CT)-(CW)-(DJ)-(I)-(SE)": "IoT$_0$",
        "(AS)-(CA,DJ)-(CS)-(CSA)-(CSL)-(CT)-(CW)-(I)-(SE)": "IoT$_1$",
        "(AS)-(CA,DJ)-(CS,CSA)-(CSL)-(CT)-(CW)-(I)-(SE)": "IoT$_2$",
        "(AS)-(CA,DJ)-(CS,CSA,CSL)-(CT)-(CW)-(I)-(SE)": "IoT$_3$",
        "(AS)-(CA,DJ)-(CS,CSA,CSL)-(CT)-(CW,SE)-(I)": "IoT$_4$",
        "(AS)-(CA,DJ)-(CS,CSA,CSL)-(CT)-(CW,I,SE)": "IoT$_5$",
        "(A)-(B)-(C)-(D)-(E)-(F)-(G)": "Tree$_0$",
        "(A,E)-(B)-(C)-(D)-(F)-(G)": "Tree$_1$",
        "(A,D,E)-(B)-(C)-(F)-(G)": "Tree$_2$",
        "(A,B,D,E)-(C)-(F)-(G)": "Tree$_3$",
    }

    df = pd.DataFrame.from_dict(data)

    # Get the smallest Start timestamp and remove it from every timestamp value
    min_start_timestamp = df["startTimestamp"].min()
    df["startTimestamp"] = df["startTimestamp"] - min_start_timestamp
    df["endTimestamp"] = df["endTimestamp"] - min_start_timestamp
    df["duration"] =  df["endTimestamp"] - df["startTimestamp"]
    df["fusionGroup"] = "(" + df["fusionGroup"].str.replace(",", ")-(", regex=False) + ")"
    df["fusionGroup"] = df["fusionGroup"].str.replace(".", ",", regex=False)
    df["fusionGroup"] = df["fusionGroup"].map(new_fs_names)

    # Now do not count every invocation, but sort the df by the cost of trace
    grouped = df.groupby('traceId').agg({'billedDuration': ['sum'], 'startTimestamp': ['min'], 'endTimestamp': ['max'], 'fusionGroup': 'min'})
    #grouped['numInvocations'] = df.groupby('traceId').count()
    grouped['numInvocations'] = df.groupby('traceId').size()
    print(f'Total Number of Invocations should be: {grouped["numInvocations"].sum()} == {len(df.index)}')
    # Grouped: Get Root Invocation of TraceId and get rootEndTimestamp
    # Pandas join() oder merge() machen mit dem alten Dataframe. Merge ist einfacher
    rootInvocations = df[df["isRootInvocation"]][["traceId", "endTimestamp"]].rename(columns={"endTimestamp": "rootEndTimestamp"})
    grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")
    grouped["rootDuration"] = grouped["rootEndTimestamp"] - grouped["startTimestamp", "min"]

    fusion_groups_order = pd.unique(grouped.sort_values(by="rootEndTimestamp")['fusionGroup', 'min'])
    return (df,grouped,fusion_groups_order)

In [2]:
print(sns.color_palette().as_hex())


['#4c72b0', '#dd8452', '#55a868', '#c44e52', '#8172b3', '#937860', '#da8bc3', '#8c8c8c', '#ccb974', '#64b5cd']


In [3]:
def save_fig(ax, name, folder, file_type="pdf"):
    fig = ax.get_figure()
    fig.tight_layout()
    file_name = name + "." + file_type
    fig.savefig(os.path.join(folder, file_name), bbox_inches='tight')
    plt.close()

def save_plots(grouped, fusion_groups_order, billed_min_max, rr_min_max, cmap, ncols, bbox=None, legend_font=None, legend=True):

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,3))

    ax1.set_xlim(rr_min_max)
    ax1.set_xlabel("Request Response Latency [ms]")
    ax1.set_ylabel("Cumulative Distribution")
    ax2.set_xlim(billed_min_max)
    ax2.set_xlabel("Billed Duration [ms]")
    ax2.set_ylabel("Cumulative Distribution")

    sns.ecdfplot(ax=ax1, data=grouped, x=grouped['rootDuration'], hue=grouped['fusionGroup', 'min'], hue_order=fusion_groups_order, legend=None,palette=cmap)

    sns.ecdfplot(ax=ax2, data=grouped, x=grouped['billedDuration', 'sum'], hue=grouped['fusionGroup', 'min'], hue_order=fusion_groups_order, legend=None, palette=cmap)

    if legend:
        fig.legend(handles=reversed(ax1.get_lines()), labels=list(order), ncol=ncols, loc="upper center", title="Fusion Group Setup", fontsize=legend_font, bbox_to_anchor=bbox)
        print(ax1.get_lines())

    return fig

In [4]:
from scipy.stats import t
def printMeanAndCi(x, grouped, name=""):
    m = x.mean() 
    s = x.std() 
    dof = len(grouped)-1 
    confidence = 0.95

    t_crit = np.abs(t.ppf((1-confidence)/2,dof))
    print(f'Mean: {m:7.2f}, Confidence Interval: ({m-s*t_crit/np.sqrt(len(x)):7.2f} / {m+s*t_crit/np.sqrt(len(x)):7.2f}), Median: {x.median()} for {name}')

def printStats(grouped, fusion_groups_order):
    print("overall")
    printMeanAndCi(grouped["rootDuration"], grouped, "rootDuration")
    printMeanAndCi(grouped['billedDuration', 'sum'], grouped, "billedDuration")
    for group in fusion_groups_order:
        filtered = grouped[grouped['fusionGroup', 'min'] == group]
        print(group)
        printMeanAndCi(filtered["rootDuration"], grouped, f'rootDuration')
        printMeanAndCi(filtered['billedDuration', 'sum'], grouped, f'billedDuration')

In [5]:
# custom color maps because brown is ugly af
cmap = sns.color_palette(as_cmap=True)
orig_cmap = cmap[:4]
mod_cmap = [cmap[0], cmap[5], cmap[2], cmap[3], cmap[4], cmap[1]]

In [6]:
import json
import os

final_tests = [
    {
        "folder": "results/finalTests/IoT-coldStart-latency-7x300",
        "billed_mm": (7000,20000),
        "rr_mm": (1700,4500),
        "legend": False,
        "ncols": 6,
        "bbox": None,
        "cmap": mod_cmap,
    },
    {
        "folder": "results/finalTests/IoT-normal-latency-7x1000",
        "billed_mm": (600,3500),
        "rr_mm": (110,500),
        "legend": True,
        "ncols": 6,
        "bbox": (0, 0.66, 1, 0.5),
        "cmap": mod_cmap,
    },
    {
        "folder": "results/finalTests/split-coldStart-latency-6x300",
        "billed_mm": (6500,28000),
        "rr_mm": (3200,10000),
        "legend": False,
        "ncols": 4,
        "bbox": None,
        "cmap": orig_cmap,
    },
    {
        "folder": "results/finalTests/split-normal-latency-5x1000",
        "billed_mm": (0,12000),
        "rr_mm": (2900,3400),
        "legend": True,
        "ncols": 4,
        "bbox": (0, 0.66, 1, 0.5),
        "cmap": orig_cmap,
    }
]

for test in final_tests:
    results_dir = test["folder"]
    json_files = [pos_json for pos_json in os.listdir(results_dir) if pos_json.endswith('.json')]
    data = []
    for index, json_fn in enumerate(json_files):
        with open(os.path.join(results_dir, json_fn)) as in_file:
            fusion_group = json_fn.removesuffix('.json')
            content = json.load(in_file)
            print(f'Read fusion group {fusion_group} with {len(content)} lines')
            data.extend(content)

    (df,grouped,order) = df_grouped_order_from_data(data)

    if "legend_font" in test:
        p = save_plots(grouped=grouped, fusion_groups_order=order, billed_min_max=test["billed_mm"], rr_min_max=test["rr_mm"], legend=test["legend"], ncols=test["ncols"], bbox=test["bbox"], legend_font=test["legend_font"], cmap=test["cmap"])
        p.show()
        save_fig(p, "all", results_dir)
    else:
        p = save_plots(grouped,order, test["billed_mm"], test["rr_mm"], legend=test["legend"], ncols=test["ncols"], bbox=test["bbox"], cmap=test["cmap"])
        p.show()
        save_fig(p, "all", results_dir)


Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW.I.SE with 3595 lines
Read fusion group AS,CA.DJ,CS,CSA,CSL,CT,CW,I,SE with 2994 lines
Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW.SE,I with 2400 lines
Read fusion group AS,CA.DJ,CS.CSA,CSL,CT,CW,I,SE with 2697 lines
Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW,I,SE with 2398 lines
Read fusion group AS,CA,CS,CSA,CSL,CT,CW,DJ,I,SE with 3291 lines
Total Number of Invocations should be: 17375 == 17375


  grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")
  p.show()
webf NOT subset; don't know how to subset; dropped


Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW.I.SE with 11577 lines
Read fusion group AS,CA.DJ,CS,CSA,CSL,CT,CW,I,SE with 9543 lines
Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW.SE,I with 7701 lines
Read fusion group AS,CA.DJ,CS.CSA,CSL,CT,CW,I,SE with 8665 lines
Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW,I,SE with 7676 lines
Read fusion group AS,CA,CS,CSA,CSL,CT,CW,DJ,I,SE with 10479 lines
Total Number of Invocations should be: 55641 == 55641
<a list of 6 Line2D objects>


  grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")
  p.show()
webf NOT subset; don't know how to subset; dropped
  grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")
  p.show()


Read fusion group A.E,B,C,D,F,G with 2100 lines
Read fusion group A.D.E,B,C,F,G with 1799 lines
Read fusion group A.B.D.E,C,F,G with 3594 lines
Read fusion group A,B,C,D,E,F,G with 2050 lines
Total Number of Invocations should be: 9543 == 9543


webf NOT subset; don't know how to subset; dropped


Read fusion group A.E,B,C,D,F,G with 6844 lines
Read fusion group A.D.E,B,C,F,G with 5891 lines
Read fusion group A.B.D.E,C,F,G with 7824 lines
Read fusion group A,B,C,D,E,F,G with 6891 lines


  grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")
  p.show()
webf NOT subset; don't know how to subset; dropped


Total Number of Invocations should be: 27450 == 27450
<a list of 4 Line2D objects>
