In [141]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib
%matplotlib inline

sns.set(rc={'figure.figsize':(6,3)}, font_scale=1.0, style='whitegrid', font='CMU Sans Serif')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
plt.rc('axes', unicode_minus=False)

def df_grouped_order_from_data(data):
    df = pd.DataFrame.from_dict(data)
    # Get the smallest Start timestamp and remove it from every timestamp value
    min_start_timestamp = df["startTimestamp"].min()
    df["startTimestamp"] = df["startTimestamp"] - min_start_timestamp
    df["endTimestamp"] = df["endTimestamp"] - min_start_timestamp
    df["duration"] =  df["endTimestamp"] - df["startTimestamp"]
    df["fusionGroup"] = "(" + df["fusionGroup"].str.replace(",", ")-(", regex=False) + ")"
    df["fusionGroup"] = df["fusionGroup"].str.replace(".", ",", regex=False)

    # Now do not count every invocation, but sort the df by the cost of trace
    grouped = df.groupby('traceId').agg({'billedDuration': ['sum'], 'startTimestamp': ['min'], 'endTimestamp': ['max'], 'fusionGroup': 'min'})
    #grouped['numInvocations'] = df.groupby('traceId').count()
    grouped['numInvocations'] = df.groupby('traceId').size()
    print(f'Total Number of Invocations should be: {grouped["numInvocations"].sum()} == {len(df.index)}')
    # Grouped: Get Root Invocation of TraceId and get rootEndTimestamp
    # Pandas join() oder merge() machen mit dem alten Dataframe. Merge ist einfacher
    rootInvocations = df[df["isRootInvocation"]][["traceId", "endTimestamp"]].rename(columns={"endTimestamp": "rootEndTimestamp"})
    grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")
    grouped["rootDuration"] = grouped["rootEndTimestamp"] - grouped["startTimestamp", "min"]

    fusion_groups_order = pd.unique(grouped.sort_values(by="rootEndTimestamp")['fusionGroup', 'min'])
    return (df,grouped,fusion_groups_order)

In [142]:
def save_fig(ax, name, folder, file_type="pdf"):
    fig = ax.get_figure()
    fig.tight_layout()
    file_name = name + "." + file_type
    fig.savefig(os.path.join(folder, file_name), bbox_inches='tight')
    plt.close()

def save_plots(df, grouped, fusion_groups_order, billed_min_max, rr_min_max, folder, legend_font=None, legend=True, ncols=3, bbox=(0.2, 1.1, 1.8, 0.5)):
    plot, axes = (0,0)
    if legend:
        plot, axes = plt.subplots(1, 2, figsize=(12,4))
    else:
        plot, axes = plt.subplots(1, 2, figsize=(12,3))

    axes[1].set_xlim(billed_min_max)
    axes[1].set_xlabel("Billed Duration [ms]")
    axes[1].set_ylabel("Cumulative Distribution")
    axes[0].set_xlim(rr_min_max)
    axes[0].set_xlabel("Request Response Latency [ms]")
    axes[0].set_ylabel("Cumulative Distribution")

    ax = sns.ecdfplot(ax=axes[1], data=grouped, x=grouped['billedDuration', 'sum'], hue=grouped['fusionGroup', 'min'], hue_order=fusion_groups_order, legend=None)

    #plot.legend(loc="upper right", bbox_to_anchor=(0.5, -1.25), ncol=3, title="test123")
    #handles, labels = ax.get_legend_handles_labels()
    #ax.legend().set_visible(True)
    #plot.legend(handles=handles, labels=labels, loc="upper right", bbox_to_anchor=(0.5, -1.25), ncol=3, title="test123")
    #plot.legend(handles, labels, loc="upper center")
    #ax.get_legend().remove()
    #ax.get_legend().set_bbox_to_anchor(())
    #ax.set(xlim=billed_min_max)

    ax = sns.ecdfplot(ax=axes[0], data=grouped, x=grouped['rootDuration'], hue=grouped['fusionGroup', 'min'], hue_order=fusion_groups_order)

    if legend:
        ax.get_legend().set_title("Fusion Setup")
        sns.move_legend(ax, loc='lower left', bbox_to_anchor=bbox, ncol=ncols, mode="expand", borderaxespad=0, fontsize=legend_font) # fontsize
    else:
        ax.legend().set_visible(False)
    #plt.xlabel("request response latency [ms]")
    #plt.ylabel("Cumulative Distribution")
    #ax.get_legend().set_title("Fusion Group")
    #ax.set(xlim=rr_min_max)
    #save_fig(ax, "ecdfRR", folder)
    #plt.legend(title="Fusion Group", labels=fusion_groups_order, ncol=3,loc=9, bbox_to_anchor=(0.5, 1.25, 1, 1))
    #plot.tight_layout()
    #axes[0].get_legend().remove()
    #axes[1].get_legend().remove()
    #plot.legend(loc="upper right", bbox_to_anchor=(0.5, -1.25), ncol=3, title="test123")
    #plt.subplots_adjust(wspace=0.1,hspace=0.1)
    save_fig(plot, "all", folder)

In [143]:
from scipy.stats import t
def printMeanAndCi(x, grouped, name=""):
    m = x.mean() 
    s = x.std() 
    dof = len(grouped)-1 
    confidence = 0.95

    t_crit = np.abs(t.ppf((1-confidence)/2,dof))
    print(f'Mean: {m:7.2f}, Confidence Interval: ({m-s*t_crit/np.sqrt(len(x)):7.2f} / {m+s*t_crit/np.sqrt(len(x)):7.2f}), Median: {x.median()} for {name}')

def printStats(grouped, fusion_groups_order):
    print("overall")
    printMeanAndCi(grouped["rootDuration"], grouped, "rootDuration")
    printMeanAndCi(grouped['billedDuration', 'sum'], grouped, "billedDuration")
    for group in fusion_groups_order:
        filtered = grouped[grouped['fusionGroup', 'min'] == group]
        print(group)
        printMeanAndCi(filtered["rootDuration"], grouped, f'rootDuration')
        printMeanAndCi(filtered['billedDuration', 'sum'], grouped, f'billedDuration')

In [144]:
import json
import os

final_tests = [
    {
        "folder": "results/finalTests/IoT-coldStart-latency-7x300",
        "billed_mm": (7000,20000),
        "rr_mm": (1700,4500),
        "legend": False,
        "ncols": 3,
        "bbox": (0.2, 1.1, 1.8, 0.5)
    },
    {
        "folder": "results/finalTests/IoT-normal-latency-7x1000",
        "billed_mm": (600,3500),
        "rr_mm": (110,500),
        "legend": True,
        "ncols": 3,
        "bbox": (0, 1.1, 2.19, 0.5),
        "legend_font": 9
    },
    {
        "folder": "results/finalTests/split-coldStart-latency-6x300",
        "billed_mm": (6500,28000),
        "rr_mm": (3200,10000),
        "legend": False,
        "ncols": 2,
        "bbox": (0.63, 1.1, 0.95, 0.5)
    },
    {
        "folder": "results/finalTests/split-normal-latency-5x1000",
        "billed_mm": (0,12000),
        "rr_mm": (2900,3400),
        "legend": True,
        "ncols": 2,
        "bbox": (0.63, 1.1, 0.95, 0.5)
    } 
]

for test in final_tests:
    results_dir = test["folder"]
    json_files = [pos_json for pos_json in os.listdir(results_dir) if pos_json.endswith('.json')]
    data = []
    for index, json_fn in enumerate(json_files):
        with open(os.path.join(results_dir, json_fn)) as in_file:
            fusion_group = json_fn.removesuffix('.json')
            content = json.load(in_file)
            print(f'Read fusion group {fusion_group} with {len(content)} lines')
            data.extend(content)
    
    (df,grouped,order) = df_grouped_order_from_data(data)
    if "legend_font" in test:
        save_plots(df,grouped,order, test["billed_mm"], test["rr_mm"], results_dir, legend=test["legend"], ncols=test["ncols"], bbox=test["bbox"], legend_font=test["legend_font"])
    else:
        save_plots(df,grouped,order, test["billed_mm"], test["rr_mm"], results_dir, legend=test["legend"], ncols=test["ncols"], bbox=test["bbox"])
    

Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW,I,SE with 2398 lines
Read fusion group AS,CA.DJ,CS,CSA,CSL,CT,CW,I,SE with 2994 lines
Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW.I.SE with 3595 lines
Read fusion group AS,CA.DJ,CS.CSA,CSL,CT,CW,I,SE with 2697 lines
Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW.SE,I with 2400 lines
Read fusion group AS,CA,CS,CSA,CSL,CT,CW,DJ,I,SE with 3291 lines
Total Number of Invocations should be: 17375 == 17375


  grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW,I,SE with 7676 lines
Read fusion group AS,CA.DJ,CS,CSA,CSL,CT,CW,I,SE with 9543 lines
Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW.I.SE with 11577 lines
Read fusion group AS,CA.DJ,CS.CSA,CSL,CT,CW,I,SE with 8665 lines
Read fusion group AS,CA.DJ,CS.CSA.CSL,CT,CW.SE,I with 7701 lines
Read fusion group AS,CA,CS,CSA,CSL,CT,CW,DJ,I,SE with 10479 lines
Total Number of Invocations should be: 55641 == 55641


  grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")


Read fusion group A.D.E,B,C,F,G with 1799 lines
Read fusion group A,B,C,D,E,F,G with 2050 lines
Read fusion group A.B.D.E,C,F,G with 3594 lines
Read fusion group A.E,B,C,D,F,G with 2100 lines
Total Number of Invocations should be: 9543 == 9543


  grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


Read fusion group A.D.E,B,C,F,G with 5891 lines
Read fusion group A,B,C,D,E,F,G with 6891 lines
Read fusion group A.B.D.E,C,F,G with 7824 lines
Read fusion group A.E,B,C,D,F,G with 6844 lines
Total Number of Invocations should be: 27450 == 27450


  grouped = pd.merge(grouped, rootInvocations, how="left", on="traceId")
