In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import itertools
import os

%matplotlib inline
%config InlineBackend.figure_format = 'png'

matplotlib.rcParams['pdf.fonttype'] = 42

In [None]:
# Parse output of end-to-end benchmarks
class MageMeasurement(object):
    def __init__(self, f):
        lines = []
        for line in f:
            tokens = line.split()
            if len(tokens) >= 2 and tokens[1] == "(ns)":
                stats = Stats(line)
                if stats.name not in self.stats:
                    self.stats[stats.name] = []
                self.stats[stats.name].append(stats.total)
            elif len(tokens) == 3 and tokens[0] == "Timer:" and tokens[2] == "ns":
                self.time_for_computation_ns = int(tokens[1])
                self.time_for_computation_ms = self.time_for_computation_ns / 1000000.0
            lines.append(line)
        if len(lines) == 0:
            return
        
        tokens = lines[-1].split()
        if len(tokens) == 2 and tokens[1] == "ms":
            self.total_time_ms = int(tokens[0])
        else:
            assert(False)
            self.total_time_ms = self.time_for_computation_ns / 1000000.0 # Hack to produce graphs with some missing data

class EMPMeasurement(object):
    def __init__(self, f):
        lines = []
        for line in f:
            lines.append(line)
        assert(len(lines) == 4)
        assert(lines[0] == "connected\n")
        assert(lines[2] == "PASS\n")
        self.time_for_computation_ms = int(lines[1].split()[1])
        self.total_time_ms = int(lines[3].split()[1])
        
class PlanningMeasurement(object):
    def __init__(self, f):
        lines = []
        for line in f:
            lines.append(line)
        assert(len(lines) == 6)
        phase_times = lines[5].split()
        self.placement_ms = int(phase_times[3])
        self.replacement_ms = int(phase_times[4])
        self.scheduling_ms = int(phase_times[5])
        self.total_ms = self.placement_ms + self.replacement_ms + self.scheduling_ms
        
class PlanStats(object):
    def __init__(self, f):
        for line in f:
            line = line.strip()
            tokens = line.split(":")
            if len(tokens) == 2 and tokens[0] == "Maximum resident set size (kbytes)":
                self.mem_usage_kb = int(tokens[1].strip())
            elif len(tokens) == 6 and tokens[0] == "Elapsed (wall clock) time (h":
                self.wall_clock_s = float(tokens[-1]) + (60 * int(tokens[-2]))
            elif len(tokens) == 7 and tokens[0] == "Elapsed (wall clock) time (h":
                self.wall_clock_s = float(tokens[-1]) + (60 * int(tokens[-2])) + (60 * 60 * int(tokens[-3]))
                
        
class SEALMeasurement(object):
    def __init__(self, f):
        lines = []
        for line in f:
            lines.append(line)
        assert(len(lines) == 1)
        self.total_time_ms = int(lines[0].split()[0])

class Stats(object):
    def __init__(self, line):
        tokens = line.strip().split()
        self.name = tokens[0]
        self.unit = tokens[1][1:-2]
        assert(tokens[3] == "min")
        assert(tokens[6] == "avg")
        assert(tokens[9] == "max")
        assert(tokens[12] == "count")
        assert(tokens[15] == "sum")
        self.total = int(tokens[17])

In [None]:
def generate_filename(prefix, program, scenario, tag):
    if isinstance(tag, int):
        tag = "t{0}".format(tag)
    return "{0}_{1}_{2}_{3}.log".format(prefix, program, scenario, tag)

def parse_emp_measurement_file(filename):
    with open(filename) as f:
        return EMPMeasurement(f)

def parse_mage_measurement_file(filename):
    with open(filename) as f:
        return MageMeasurement(f)
    
def parse_seal_measurement_file(filename):
    with open(filename) as f:
        return SEALMeasurement(f)
    
def parse_planning_measurement_file(filename):
    with open(filename) as f:
        return PlanningMeasurement(f)
    
def parse_plan_stats_file(filename):
    with open(filename) as f:
        return PlanStats(f)
    
def parse_plan_size_file(filename):
    with open(filename) as f:
        return int(f.read().strip())

    
allowable_locations = ("oregon", "iowa", "virginia")

def parse_log_directory(directory):
    logs = {}
    machine_directories = !ls {directory}
    for mdir in machine_directories:
        try:
            machine_id = int(mdir)
        except ValueError:
            allowed = False
            for loc in allowable_locations:
                if mdir.startswith(loc):
                    allowed = True
                    break
            if allowed:
                machine_id = mdir
            else:
                print("Skipping directory {0}".format(os.path.join("directory", mdir)))
                continue
            
        machine_logs = logs.setdefault(machine_id, {})
            
        log_files = !ls {os.path.join(directory, mdir)}
        for log_file in log_files:
            if log_file.endswith(".log") or log_file.endswith(".planning") or log_file.endswith(".planstats") or log_file.endswith(".plansize"):
                log_path = os.path.join(directory, mdir, log_file)
                
                if os.stat(log_path).st_size == 0:
                    print("Skpping empty file {0}".format(log_path))
                    continue
                
                parts = log_file.split(".")
                extension = parts[-1]
                name = ".".join(parts[:-1])
                
                ext_logs = machine_logs.setdefault(extension, {})
                
                tokens = name.split("_")
                if len(tokens) < 6:
                    print("Skipping(1) file {0}".format(log_path))
                    continue
                if tokens[0] == "wan" or tokens[0] == "pairedwan":
                    experiment = "_".join(tokens[:2])
                    location = tokens[1]
                    try:
                        workers_per_node = int(tokens[2])
                        ot_pipeline_depth = int(tokens[3])
                        ot_num_daemons = int(tokens[4])
                        size = int(tokens[-4])
                    except ValueError as ve:
                        print(ve)
                        print("Skipping(2) file {0}".format(log_path))
                        continue
                    problem = "_".join(tokens[5:-4])
                    scenario = tokens[-3]
                    tag = tokens[-2]
                    worker = tokens[-1]
                    experiments = ext_logs.setdefault(tokens[0], {}).setdefault(location, {}).setdefault(workers_per_node, {}).setdefault(problem, {}).setdefault(size, {}).setdefault(ot_pipeline_depth, {}).setdefault(ot_num_daemons, {}).setdefault(scenario, {}).setdefault(worker, {})
                else:
                    experiment = "_".join(tokens[:2])
                    try:
                        size = int(tokens[-3])
                    except ValueError:
                        print("Skipping file {0}".format(log_path))
                        continue
                    problem = "_".join(tokens[2:-3])
                    scenario = tokens[-2]
                    tag = tokens[-1]
                    experiments = ext_logs.setdefault(experiment, {}).setdefault(problem, {}).setdefault(size, {}).setdefault(scenario, {})
                if tag in experiments:
                    print("Skipping {0} (duplicate for {1})".format(log_path, (experiment, problem, size, scenario, tag)))
                    continue
                    
                try:
                    if scenario in ("os", "unbounded", "mage"):
                        if extension == "log":
                            parsed = parse_mage_measurement_file(log_path)
                        elif extension == "planning":
                            parsed = parse_planning_measurement_file(log_path)
                        elif extension == "planstats":
                            parsed = parse_plan_stats_file(log_path)
                        elif extension == "plansize":
                            parsed = parse_plan_size_file(log_path)
                    elif scenario == "emp":
                        parsed = parse_emp_measurement_file(log_path)
                    elif scenario == "seal":
                        parsed = parse_seal_measurement_file(log_path)
                    else:
                        print("Skipping {0} (unknown scenario {1})".format(log_path, scenario))
                        continue
                    experiments[tag] = parsed
                except AssertionError as ae:
                    print("Skipping {0} (assertion failure: {1})".format(log_path, str(ae)))
    return logs

A Simple, Guided Example
======================

In [None]:
simple_directory = "logs-workloads-2"

In [None]:
simple_logs = parse_log_directory(simple_directory)

In [None]:
simple_data = simple_logs[0]["log"]["workers_1"]["merge_sorted"][1048576]

simple_unbounded_time = simple_data["unbounded"]["t1"].total_time_ms / 1000.0
simple_mage_time = simple_data["mage"]["t1"].total_time_ms / 1000.0
simple_os_time = simple_data["os"]["t1"].total_time_ms / 1000.0

In [None]:
plt.figure(figsize = (4, 3))

labels = ("Unbounded", "MAGE 1GiB", "OS 1 GiB")
values = (simple_unbounded_time, simple_mage_time, simple_os_time)

plt.bar(labels, values)

plt.ylabel("Time (s)")

plt.show()

Baseline Experiments (Figures 6 and 7)
================================

In [None]:
baseline_directory = "logs-baseline"

In [None]:
baseline_logs = parse_log_directory(baseline_directory)

In [None]:
baseline_unbounded_style = {"label": "Unbounded", "color": "blue", "marker": "s", "ls": "-", "linewidth": 3}       
baseline_mage_style = {"label": "MAGE 1 GiB", "color": "orange", "marker": "^", "ls": "--"}
baseline_os_style = {"label": "OS 1 GiB", "color": "green", "marker": "o", "ls": "-"}
baseline_emp_style = {"label": "EMP 1 GiB", "color": "red", "marker": "v", "ls": "-."}
baseline_seal_style = {"label": "SEAL 1 GiB", "color": "red", "marker": "v", "ls": "-."}

In [None]:
def draw_halfgates_baseline_graph(sizes):
    plt.figure(figsize = (3, 2))
    
    benchmark_data = baseline_logs[0]["log"]["halfgates_baseline"]["merge_sorted"]
    def get_data(scenario, sizes, tag):
        result = []
        for size in sizes:
            time_s = benchmark_data[size][scenario][tag].time_for_computation_ms / 1000.0
            result.append(time_s)
        return result

    plt.plot(sizes, get_data("unbounded", sizes, "t1"), **baseline_unbounded_style)
    plt.plot(sizes, get_data("os", sizes, "t1"), **baseline_os_style)
    plt.plot(sizes, get_data("mage", sizes, "t1"), **baseline_mage_style)
    plt.plot(sizes, get_data("emp", sizes, "t1"), **baseline_emp_style)

    plt.xlim(0, sizes[-1])
    
    plt.xlabel("Problem Size (Records Per Party)")
    plt.ylabel("Time (s)")
    
def draw_ckks_baseline_graph(sizes):
    plt.figure(figsize = (3, 2))
    
    benchmark_data = baseline_logs[0]["log"]["ckks_baseline"]["real_statistics"]
    def get_data(scenario, sizes, tag):
        result = []
        for size in sizes:
            time_s = benchmark_data[size][scenario][tag].total_time_ms / 1000.0
            result.append(time_s)
        return result

    plt.plot(sizes, get_data("unbounded", sizes, "t1"), **baseline_unbounded_style)
    plt.plot(sizes, get_data("os", sizes, "t1"), **baseline_os_style)
    plt.plot(sizes, get_data("mage", sizes, "t1"), **baseline_mage_style)
    plt.plot(sizes, get_data("seal", sizes, "t1"), **baseline_seal_style)

    plt.xlim(0, sizes[-1])
    
    plt.xlabel("Problem Size (Number of Elements)")
    plt.ylabel("Time (s)")

In [None]:
draw_halfgates_baseline_graph(tuple(2 ** i for i in range(13, 20)))
plt.ylim(0, 200)

plt.legend(loc=2)
plt.show()

In [None]:
draw_halfgates_baseline_graph(tuple(2 ** i for i in range(13, 21)))
plt.ylim(0, 1000)

plt.xticks((0, 500000, 1000000))
plt.ticklabel_format(style = "plain")

plt.legend(loc=2)
plt.show()

In [None]:
draw_ckks_baseline_graph(tuple(2 ** i for i in range(8, 13)))
plt.ylim(0, 10)

plt.legend()
plt.show()

In [None]:
draw_ckks_baseline_graph(tuple(2 ** i for i in range(8, 15)))
plt.ylim(0, 250)

plt.legend()
plt.show()

Single-Node Experiments
=====================

In [None]:
single_node_directory = "logs-workloads-2"

In [None]:
single_node_logs = parse_log_directory(single_node_directory)

In [None]:
barwidth = 0.2
errbarsize = 3
styles = [{"label": "Unbounded", \
            "width": barwidth, "capsize": errbarsize, "color": "blue", "hatch": "", "edgecolor": "black"},          
          {"label": "MAGE 1 GiB", \
           "width": barwidth, "capsize": errbarsize, "color": "white", "hatch": "..", "edgecolor": "black"},
          {"label": "OS 1 GiB", \
           "width": barwidth, "capsize": errbarsize, "color": "white", "hatch": "\\\\", "edgecolor": "black"}]

In [None]:
def smart_round(value):
    if value < 0.1:
        return round(value, 3)
    elif value < 1:
        return round(value, 2)
    elif value < 1000:
        return round(value, 1)
    else:
        return int(round(value, 0))
    
def smart_round_percent(value):
    if value < 0.1:
        return round(value, 3)
    elif value < 1:
        return round(value, 2)
    elif value < 10:
        return round(value, 1)
    else:
        return int(round(value, 0))

In [None]:
# Based on the "autolabel" function here: https://matplotlib.org/3.1.3/gallery/lines_bars_and_markers/barchart.html
def autolabel(rects, errs, data, ove_imp = []):
    for i, rect in enumerate(rects):
        height = rect.get_height()
        if ove_imp and False:
            to_show = "{0} ({1}%, {2}x)".format(smart_round(data[i][2]), smart_round_percent((ove_imp[i][0] - 1) * 100), smart_round_percent(ove_imp[i][1]))
        else:
            to_show = "{0}".format(smart_round(data[i][2]))
        plt.annotate(to_show,
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(-6, 3), # offset (to make room for error bar)
                    textcoords = "offset points", rotation = 90,
                    ha = "center", va = "bottom")
        
def draw_workloads_plot(experiments, graph_data, experiment_display_names, force_autolabel = False):
    x = np.arange(0, len(experiments))
    unbounded_err = [[(graph_data[e]["unbounded"][2] - graph_data[e]["unbounded"][1]) / graph_data[e]["unbounded"][2] for e in experiments], [(graph_data[e]["unbounded"][3] - graph_data[e]["unbounded"][2]) / graph_data[e]["unbounded"][2] for e in experiments]]
    unbounded_bars = plt.bar(x - barwidth, [graph_data[e]["unbounded"][2] / graph_data[e]["unbounded"][2] for e in experiments], yerr = unbounded_err, **styles[0])
    mage_err = [[(graph_data[e]["mage"][2] - graph_data[e]["mage"][1]) / graph_data[e]["unbounded"][2] for e in experiments], [(graph_data[e]["mage"][3] - graph_data[e]["mage"][2]) / graph_data[e]["unbounded"][2] for e in experiments]]
    mage_bars = plt.bar(x, [graph_data[e]["mage"][2] / graph_data[e]["unbounded"][2] for e in experiments], yerr = mage_err, **styles[1])
    os_err = [[(graph_data[e]["os"][2] - graph_data[e]["os"][1]) / graph_data[e]["unbounded"][2] for e in experiments], [(graph_data[e]["os"][3] - graph_data[e]["os"][2]) / graph_data[e]["unbounded"][2] for e in experiments]]
    os_bars = plt.bar(x + barwidth, [graph_data[e]["os"][2] / graph_data[e]["unbounded"][2] for e in experiments], yerr = os_err, **styles[2])
    
    if force_autolabel:
        autolabel(unbounded_bars, unbounded_err, [graph_data[e]["unbounded"] for e in experiments])
        autolabel(mage_bars, mage_err, [graph_data[e]["mage"] for e in experiments], [(graph_data[e]["mage"][2] / graph_data[e]["unbounded"][2], graph_data[e]["os"][2] / graph_data[e]["mage"][2]) for e in experiments])
        autolabel(os_bars, os_err, [graph_data[e]["os"] for e in experiments])
    
    plt.xticks(x, experiment_display_names)
    plt.ylabel("Time (Normalized\nby Unbounded)")

In [None]:
hg_experiments = (("merge_sorted", 1048576), ("full_sort", 1048576), ("loop_join", 2048), ("matrix_vector_multiply", 8192), ("binary_fc_layer", 16384))
ckks_experiments = (("real_sum", 65536), ("real_statistics", 16384), ("real_matrix_vector_multiply", 256), ("real_naive_matrix_multiply", 128), ("real_tiled_16_matrix_multiply", 128))

experiments = hg_experiments + ckks_experiments
experiment_display_names = ("merge\nn = 1048576", "sort\nn = 1048576", "ljoin\nn = 2048", "mvmul\nn = 8192", "binfclayer\nn = 16384", "rsum\nn = 65536", "rstats\nn = 16384", "rmvmul\nn = 256", "n_rmatmul\nn = 128", "t_rmatmul\nn = 128")

In [None]:
plt.figure(figsize = (15, 2))

num_machines = 1
graph_data = {}
for i, e in enumerate(experiments):
    graph_data[e] = {}
    print(experiment_display_names[i].split("\n")[0], end = "")
    for scenario in ("unbounded", "os", "mage"):
        total_times = []
        for machine_id in range(num_machines):
            benchmark_data = single_node_logs[machine_id]["log"]["workers_1"]
            total_times.extend(m.total_time_ms / 1000.0 for tag, m in benchmark_data[e[0]][e[1]][scenario].items())
        assert len(total_times) > 0
        graph_data[e][scenario] = np.percentile(total_times, (0, 25, 50, 75, 100))
        print(" &", round(np.median(total_times), 1), end="")
    print(" &", round(graph_data[e]["os"][2] / graph_data[e]["mage"][2], 1), end="")
    print(" &", round((graph_data[e]["mage"][2] / graph_data[e]["unbounded"][2] - 1) * 100, 0), end="\\%")
    print()

draw_workloads_plot(experiments, graph_data, experiment_display_names)

# Uncomment this line for the scale to match the one in the figure in the paper.
plt.ylim(0, 15)
# plt.xlim(-0.5, 10.5)

plt.legend()
plt.show()

In [None]:
# TODO: Support multiple trials done in parallel across different pairs of machines
planning_data = single_node_logs[0]["planning"]["workers_1"]
for i, e in enumerate(experiments):
    graph_data[e] = {}
    print(experiment_display_names[i].split("\n")[0], end = "")
    planning_times = tuple(m.total_ms / 1000.0 for tag, m in planning_data[e[0]][e[1]]["mage"].items())
    assert len(planning_times) > 0
    graph_data[e][scenario] = np.percentile(planning_times, (0, 25, 50, 75, 100))
    median = np.median(planning_times)
    if median < 0.1:
        rounded_median = round(median, 3)
    elif median < 1:
        rounded_median = round(median, 2)
    else:
        rounded_median = round(median, 1)
    print(" &", rounded_median, end="\n")

p = 4 Parallelism Experiments
========================

In [None]:
multi_node_directory = "logs-workloads-8"

In [None]:
multi_node_logs = parse_log_directory(multi_node_directory)

In [None]:
hg_experiments_4 = (("merge_sorted", 4194304), ("full_sort", 4194304), ("loop_join", 4096), ("matrix_vector_multiply", 16384), ("binary_fc_layer", 32768))
ckks_experiments_4 = (("real_sum", 262144), ("real_statistics", 65536), ("real_matrix_vector_multiply", 512), ("real_naive_matrix_multiply", 256), ("real_tiled_16_matrix_multiply", 256))

experiments_4 = hg_experiments_4 + ckks_experiments_4
experiment_4_display_names = ("merge\nn = 4194384", "sort\nn = 4194384", "ljoin\nn = 4096", "mvmul\nn = 16384", "binfclayer\nn = 32768", "rsum\nn = 262144", "rstats\nn = 32768", "rmvmul\nn = 512", "n_rmatmul\nn = 256", "t_rmatmul\nn = 256")

In [None]:
plt.figure(figsize = (15, 2))

# TODO: Support multiple trials done in parallel across different pairs of machines
graph_data = {}
for i, e in enumerate(experiments_4):
    graph_data[e] = {}
    print(experiment_4_display_names[i].split("\n")[0], end = "")
    for scenario in ("unbounded", "os", "mage"):
        instances_per_tag = {}
        for machine in (0, 1, 2, 3):
            machine_trials = multi_node_logs[machine]["log"]["workers_4"][e[0]][e[1]][scenario]
            for tag, m in machine_trials.items():
                instances_per_tag.setdefault(tag, []).append(m)
                
        total_times = []
        for tag, machine_exps in instances_per_tag.items():
            assert len(machine_exps) == 4
            total_times.append(max(m.total_time_ms / 1000.0 for m in machine_exps))
        assert len(total_times) > 0
        graph_data[e][scenario] = np.percentile(total_times, (0, 25, 50, 75, 100))
        print(" &", round(np.median(total_times), 1), end="")
    print(" &", round(graph_data[e]["os"][2] / graph_data[e]["mage"][2], 1), end="")
    print(" &", round((graph_data[e]["mage"][2] / graph_data[e]["unbounded"][2] - 1) * 100, 0), end="\\%")
    print()

draw_workloads_plot(experiments_4, graph_data, experiment_4_display_names)

# Uncomment this line for the scale to match the one in the figure in the paper.
plt.ylim(0, 20)
# plt.xlim(-0.5, 10.5)

plt.legend()
plt.show()

WAN Experiments: Number of Connections
===================================

In [None]:
wan_conn_directory = "logs-wan-conn"

In [None]:
wan_conn_logs = parse_log_directory(wan_conn_directory)

In [None]:
wan_oregon_style = {"label": "us-west1", "color": "blue", "ls": "-"}
wan_iowa_style = {"label": "us-central1", "color": "orange", "ls": "-."}
wan_virginia_style = {"label": "us-east4", "color": "red", "ls": "-."}
wan_lan_style = {"label": "Local (US West 2)", "color": "green", "ls": "--"}
errbarsize = 3

In [None]:
plt.figure(figsize = (3, 2))

num_workers = (1, 2, 4)
exec_times_by_loc = {}
for location in ("oregon", "iowa"):
    if location == "oregon" or True:
        ot_concurrency = 128
        ot_num_connections = 1
    elif location == "iowa":
        ot_concurrency = 256
        ot_num_connections = 1
    exec_times = []
    exec_times_by_loc[location] = exec_times
    for num_workers_per_node in num_workers:
        benchmark_data = wan_conn_logs[0]["log"]["wan"][location][num_workers_per_node]["merge_sorted"][1048576]
        ot_pipeline_depth = ot_concurrency // (ot_num_connections * num_workers_per_node)
        data = benchmark_data[ot_pipeline_depth][ot_num_connections]["mage"]

        total_times = []
        for tag in ("t{0}".format(i) for i in itertools.count(start = 1)):
            if tag not in data["w0"]:
                break
            measurements = []
            for worker_id in ("w{0}".format(i) for i in range(num_workers_per_node)):
                measurement = data[worker_id][tag]
                measurements.append(measurement.total_time_ms / 1000.0)
            total_times.append(np.max(measurements))
        exec_times.append(np.percentile(total_times, (0, 25, 50, 75, 100)))

error_bars = ([stats[2] - stats[1] for stats in exec_times_by_loc["oregon"]], [stats[3] - stats[2] for stats in exec_times_by_loc["oregon"]])
plt.errorbar(num_workers, tuple(stats[2] for stats in exec_times_by_loc["oregon"]), yerr = error_bars, capsize = errbarsize, **wan_oregon_style)
error_bars = ([stats[2] - stats[1] for stats in exec_times_by_loc["iowa"]], [stats[3] - stats[2] for stats in exec_times_by_loc["iowa"]])
plt.errorbar(num_workers, tuple(stats[2] for stats in exec_times_by_loc["iowa"]), yerr = error_bars, capsize = errbarsize, **wan_iowa_style)

plt.plot(num_workers, [174.5 for _ in num_workers], **wan_lan_style)

plt.legend()

plt.xlabel("Number of workers")
plt.ylabel("Time (s)")

plt.ylim(0, 1200)

plt.show()

WAN Experiments: OT Parallelism
============================

In [None]:
wan_ot_directory = "logs-wan-ot"

In [None]:
wan_ot_logs = parse_log_directory(wan_ot_directory)

In [None]:
plt.figure(figsize = (3, 2))
    
benchmark_data = wan_ot_logs[0]["log"]["wan"]["oregon"][1]["merge_sorted"][1048576]

ot_concurrencies = (2, 4, 8, 16, 32, 64, 128, 256)
exec_times = []
for ot_concurrency in ot_concurrencies:
    ot_num_connections = 2
    ot_pipeline_depth = ot_concurrency // ot_num_connections
    data = benchmark_data[ot_pipeline_depth][ot_num_connections]["mage"]
    
    total_times = []
    for tag in ("t{0}".format(i) for i in itertools.count(start = 1)):
        if tag not in data["w0"]:
            break
        measurement = data["w0"][tag]
        total_times.append(measurement.total_time_ms / 1000.0)
    exec_times.append(np.percentile(total_times, (0, 25, 50, 75, 100)))

error_bars = ([stats[2] - stats[1] for stats in exec_times], [stats[3] - stats[2] for stats in exec_times])
plt.errorbar(ot_concurrencies, tuple(stats[2] for stats in exec_times), yerr = error_bars, capsize = errbarsize, **wan_oregon_style)

plt.legend()

plt.xlabel("OT Concurrency")
plt.ylabel("Time (s)")

plt.show()

16 GiB Experiments
================

In [None]:
large_exp_directory = "logs-16-16gb"

In [None]:
large_exp_logs = parse_log_directory(large_exp_directory)

In [None]:
barwidth = 0.2
errbarsize = 3
styles = [{"label": "Unbounded", \
            "width": barwidth, "capsize": errbarsize, "color": "blue", "hatch": "", "edgecolor": "black"},          
          {"label": "MAGE 16 GiB", \
           "width": barwidth, "capsize": errbarsize, "color": "white", "hatch": "..", "edgecolor": "black"},
          {"label": "OS 16 GiB", \
           "width": barwidth, "capsize": errbarsize, "color": "white", "hatch": "\\\\", "edgecolor": "black"}]

In [None]:
hg_experiments = (("merge_sorted", 8388608), ("full_sort", 8388608), ("loop_join", 3840), ("matrix_vector_multiply", 20480), ("binary_fc_layer", 57344))
ckks_experiments = (("real_sum", 458752), ("real_statistics", 147456), ("real_matrix_vector_multiply", 448), ("real_naive_matrix_multiply", 256), ("real_tiled_matrix_multiply", 224))

experiments = hg_experiments + ckks_experiments
experiment_display_names = ("merge\nn = 8388608", "sort\nn = 8388608", "ljoin\nn = 3840", "mvmul\nn = 20480", "binfclayer\nn = 57344", "rsum\nn = 458752", "rstats\nn = 147456", "rmvmul\nn = 448", "n_rmatmul\nn = 256", "t_rmatmul\nn = 224")

In [None]:
plt.figure(figsize = (15, 2))

num_machines = 8
graph_data = {}
for i, e in enumerate(experiments):
    graph_data[e] = {}
    print(experiment_display_names[i].split("\n")[0], end = "")
    for scenario in ("unbounded", "os", "mage"):
        total_times = []
        for machine_id in range(num_machines):
            benchmark_data = large_exp_logs[machine_id]["log"]["workers_1"]
            total_times.extend(m.total_time_ms / 1000.0 for tag, m in benchmark_data[e[0]][e[1]][scenario].items())
        assert len(total_times) > 0
        graph_data[e][scenario] = np.percentile(total_times, (0, 25, 50, 75, 100))
        print(" &", round(np.median(total_times), 1), end="")
    print(" &", round(graph_data[e]["os"][2] / graph_data[e]["mage"][2], 1), end="")
    print(" &", round((graph_data[e]["mage"][2] / graph_data[e]["unbounded"][2] - 1) * 100, 0), end="\\%")
    print()

draw_workloads_plot(experiments, graph_data, experiment_display_names)

plt.ylim(0, 25)

plt.legend(loc = 1)
plt.show()

Planning Data
============

In [None]:
small_planning = parse_log_directory("logs-workloads-2-planning")
large_planning = parse_log_directory("logs-16gib-workloads-2-planning")

In [None]:
generic_names = ("merge", "sort", "ljoin", "mvmul", "binfclayer", "rsum", "rstats", "rmvmul", "n_rmatmul", "t_rmatmul")
small_experiments = (("merge_sorted", 1048576), ("full_sort", 1048576), ("loop_join", 2048), ("matrix_vector_multiply", 8192), ("binary_fc_layer", 16384), ("real_sum", 65536), ("real_statistics", 16384), ("real_matrix_vector_multiply", 256), ("real_naive_matrix_multiply", 128), ("real_tiled_16_matrix_multiply", 128))
large_experiments = (("merge_sorted", 8388608), ("full_sort", 8388608), ("loop_join", 3840), ("matrix_vector_multiply", 20480), ("binary_fc_layer", 57344), ("real_sum", 458752), ("real_statistics", 147456), ("real_matrix_vector_multiply", 448), ("real_naive_matrix_multiply", 256), ("real_tiled_64_matrix_multiply", 224))

In [None]:
num_machines = 8
for i, name in enumerate(generic_names):
    small_list = []
    large_list = []
    for machine_id in range(num_machines):
        e = small_experiments[i]
        small = small_planning[machine_id]["planstats"]["workers_1"][e[0]][e[1]]["mage"]["t1"]
        small_list.append(small)
        
        e = large_experiments[i]
        large = large_planning[machine_id]["planstats"]["workers_1"][e[0]][e[1]]["mage"]["t1"]
        large_list.append(large)
        
    small_time = np.median([m.wall_clock_s for m in small_list])
    small_size = np.median([m.mem_usage_kb / 1024.0 for m in small_list])
    large_time = np.median([m.wall_clock_s for m in large_list])
    large_size = np.median([m.mem_usage_kb / 1024.0 for m in large_list])
    print("{0} & {1} & {2} & {3} & {4}".format(name, smart_round(small_time), smart_round(small_size), smart_round(large_time), smart_round(large_size)))

Password Reuse Query
===================

In [None]:
password_directory = "logs-password-4"

In [None]:
password_logs = parse_log_directory(password_directory)

In [None]:
application_unbounded_style = {"label": "Unbounded", "color": "blue", "marker": "s", "ls": "-", "linewidth": 3}       
application_mage_style = {"label": "MAGE with all available RAM", "color": "orange", "marker": "^", "ls": "--"}
application_os_style = {"label": "OS with all available RAM", "color": "green", "marker": "o", "ls": "-"}

In [None]:
problem_name = "password"
mage_sizes = (1048576, 2097152, 4194304, 8388608, 16777216, 33554432, 67108864, 134217728)

In [None]:
plt.figure(figsize = (8, 2))

by_scenario = {}
for scenario in ("mage", "os"):
    by_size = {}
    by_scenario[scenario] = by_size
    for size in mage_sizes:
        if size == mage_sizes[-1] and scenario == "os":
            continue # We didn't collect this data point since it would take too long
        by_trial = {}
        by_size[size] = by_trial
        for machine in (0, 1, 2, 3):
            machine_trials = password_logs[machine]["log"]["pairedwan"]["oregon"][2][problem_name][size][64][1][scenario]
            for worker, experiments in machine_trials.items():
                for tag, m in experiments.items():
                    by_trial.setdefault(tag, {})[worker] = m
                    

graph_data = {}
for scenario in ("mage", "os"):
    times = []
    graph_data[scenario] = times
    by_size = by_scenario[scenario]
    for size in mage_sizes:
        if size not in by_size:
            print("No data for size {0} for scenario \"{1}\"".format(size, scenario))
            continue
        size_data = by_size[size]
        trial_measurements = []
        for tag, worker_data in size_data.items():
            worker_measurements = []
            for worker, m in worker_data.items():
                worker_measurements.append((m.total_time_ms / 1000.0) / 3600)
            trial_measurements.append(np.max(worker_measurements))
        print(trial_measurements)
        times.append(np.median(trial_measurements))


plt.xlabel("Number of Users/Passwords Per Party")
plt.ylabel("Execution Time (hours)")

plt.plot(mage_sizes, graph_data["mage"], **application_mage_style)
plt.plot(mage_sizes[:-1], graph_data["os"], **application_os_style)
plt.legend()

plt.show()

Computational PIR
================

In [None]:
cpir_directory = "logs-cpir"

In [None]:
cpir_logs = parse_log_directory(cpir_directory)

In [None]:
cpir_name = "real_cpir"
cpir_sizes = (256, 384, 512, 640, 768, 896, 1024, 1158, 1280, 1408, 1536)

In [None]:
graph_data = {}
for scenario in ("mage", "os"):
    times = []
    graph_data[scenario] = times
    for size in cpir_sizes:
        trials = []
        for machine_id in range(4):
            data = cpir_logs[machine_id]["log"]["workers_1"][cpir_name][size][scenario]
            for tag, measurement in data.items():
                trials.append((measurement.time_for_computation_ms / 1000.0) / 60.0)
        times.append(np.median(trials))

In [None]:
plt.figure(figsize = (8, 2))

plt.plot(tuple(n * n for n in cpir_sizes), graph_data["mage"], **application_mage_style)
plt.plot(tuple(n * n for n in cpir_sizes), graph_data["os"], **application_os_style)

plt.legend()

plt.ylim(0, 80)

plt.xlabel("Number of Batches (4096 Real Numbers Per Batch)")
plt.ylabel("Exec. Time (minutes)")

plt.show()