# Curiefense performance report
## Set measurement folder

In [None]:
import os
import json
import glob
import statistics

import pandas as pd
from collections import defaultdict
import os
import matplotlib.pyplot as plt

DEFAULT_RESULTS_DIR = "./results"
RESULTS_DIR = os.environ.get("RESULTS_DIR", DEFAULT_RESULTS_DIR).rstrip("/")
print(f"Performance measurements will be read from {RESULTS_DIR}")

## Sample data
### Sample fortio measurement

In [None]:
j = json.load(open(os.path.join(RESULTS_DIR, "with_cf/fortio-30-50-250.json")))
j

### Sample jaeger measurement
Two *spans* are present for each query: one for service `istio-ingressgateway`, one for service `ratings.bookinfo`. Related traces have the same `traceID` attribute. The span for `ratings.bookinfo` has a non-empty `references` attribute, and is smaller than the span for `istio-ingressgateway`.

In [None]:
j = json.load(open(os.path.join(RESULTS_DIR, "with_cf/jaeger-30-50-250.json")))
j

## Create a pandas dataframe from jaeger & fortio outputs

In [None]:
def folder2pd(folder):
    res = defaultdict(dict)
    for f in glob.glob(os.path.join(RESULTS_DIR,folder,"fortio-*.json")):
        tname = "-".join(f.split("-")[-3:])
        d = res[tname]
        j = json.load(open(f))
        d["Connections"] = j["NumThreads"]
        d["RequestedQPS"] = int(j["RequestedQPS"])
        dh = j["DurationHistogram"]
        d["SizeMin"] = j["Sizes"]["Min"]
        d["SizeMax"] = j["Sizes"]["Max"]
        d["ActualQPS"] = j["ActualQPS"]
        pct = dh["Percentiles"]
        d["P50"] = [x["Value"] for x in pct if x["Percentile"] == 50][0]
        d["P75"] = [x["Value"] for x in pct if x["Percentile"] == 75][0]
        d["P90"] = [x["Value"] for x in pct if x["Percentile"] == 90][0]
        d["P99"] = [x["Value"] for x in pct if x["Percentile"] == 99][0]
        d["P99.9"] = [x["Value"] for x in pct if x["Percentile"] == 99.9][0]
        d["TimeMin"] = dh["Min"]
        d["TimeMax"] = dh["Max"]
        d["TimeAvg"] = dh["Avg"]
        d["TimeStdDev"] = dh["StdDev"]
    for f in glob.glob(os.path.join(RESULTS_DIR,folder,"jaeger-*.json")):
        tname = "-".join(f.split("-")[-3:])
        d = res[tname]
        j = json.load(open(f))
        trace_data = defaultdict(lambda: [None, None])
        for span in j["data"]:
            for s in span["spans"]:
                trace_id = s["traceID"]
                duration = s["duration"]
                if len(s["references"]) == 0:
                    # istio trace
                    trace_data[trace_id][0] = float(duration)/1e6
                else:
                    # app trace
                    trace_data[trace_id][1] = float(duration)/1e6
        trace_difference = []
        for (i, a) in trace_data.values():
            if i is None or a is None:
                continue
            trace_difference.append(i-a)
        d["JIstioTimeAvg"] = statistics.fmean(trace_difference)
        d["JIstioTimeP50"] = statistics.quantiles(trace_difference, n=2)[-1]
        d["JIstioTimeP75"] = statistics.quantiles(trace_difference, n=4)[-1]
        d["JIstioTimeP90"] = statistics.quantiles(trace_difference, n=10)[-1]
        d["JIstioTimeP99"] = statistics.quantiles(trace_difference, n=100)[-1]
        d["JIstioTimeMin"] = min(trace_difference)
        d["JIstioTimeMax"] = max(trace_difference)
    return pd.DataFrame(res.values())

In [None]:
folders = ["with_cf", "without_cf"]
data = {f: folder2pd(f) for f in folders}
data["without_cf"]

## Draw QPS vs time percentile
As measured from fortio

In [None]:
def qps_vs(cnx, col, args, ylim=None):
    lbls, vals = list(zip(*args.items()))
    for m in vals:
        t = (m[(m["Connections"]==cnx)]).sort_values(by=["ActualQPS"])    
        p = plt.plot(t["ActualQPS"],t[col])
    plt.legend(lbls)
    plt.xlabel("Actual QPS")
    plt.ylabel(f"{col} (s)")
    if ylim:
        plt.ylim(ylim)
    plt.title(f"connections {cnx} -- ActualQPS vs {col}")

def big_plot(*args, **kargs):
    fig = plt.figure(figsize=(20,20), dpi=72)
    qps_vs(*args, **kargs)

In [None]:
qps_vs(250, "P90", data, ylim=(0,1))

In [None]:
big_plot(500, "P90", data)

In [None]:
def plot_all(args, all_cols=["P50","P90","P99"], ylim=None):
    fig = plt.figure(figsize=(16,8), dpi= 100)
    plt.subplots_adjust(top=2)
    all_cnx = [10,70,125,250]
    
    i = 0
    for cnx in all_cnx:
        for col in all_cols:
            i += 1
            fig.add_subplot(len(all_cnx),len(all_cols),i)
            qps_vs(cnx,col,args, ylim=ylim)

In [None]:
plot_all(data,ylim=(0,8))

## Draw QPS vs latency introduced by istio
As measured from jaeger

In [None]:
plot_all(data, all_cols=["JIstioTimeP50", "JIstioTimeP90", "JIstioTimeP99"])