In [2]:
import matplotlib.pyplot as plt
import pandas as pd

# filename = "./max-tree-max-half-weight-data/max_tree_size_accuracy_circuit_level/" + f"{d}_{p}.txt"
filename = "./temp.txt"
df = pd.read_csv(filename, sep=' ', skiprows=[0], header=None)
display(df)

Unnamed: 0,0
0,"{""defect_num"":11,""error_num"":0,""events"":{""deco..."
1,"{""defect_num"":21,""error_num"":0,""events"":{""deco..."
2,"{""defect_num"":14,""error_num"":0,""events"":{""deco..."
3,"{""defect_num"":9,""error_num"":0,""events"":{""decod..."
4,"{""defect_num"":7,""error_num"":0,""events"":{""decod..."
...,...
995,"{""defect_num"":15,""error_num"":0,""events"":{""deco..."
996,"{""defect_num"":10,""error_num"":0,""events"":{""deco..."
997,"{""defect_num"":14,""error_num"":0,""events"":{""deco..."
998,"{""defect_num"":19,""error_num"":0,""events"":{""deco..."


In [11]:
import json
import subprocess
import os
import sys
import tempfile
import math
import scipy


class Profile:
    """
    read profile given filename; if provided `skip_begin_profiles`, then it will skip such number of profiles in the beginning,
    by default to 5 because usually the first few profiles are not stable yet
    """

    def __init__(self, filename, skip_begin_profiles=20):
        assert isinstance(filename, str)
        with open(filename, "r", encoding="utf8") as f:
            lines = f.readlines()
        self.partition_config = None
        self.entries = []
        skipped = 0
        for line_idx, line in enumerate(lines):
            line = line.strip("\r\n ")
            if line == "":
                break
            value = json.loads(line)
            if line_idx == 0:
                continue
                # self.partition_config = PartitionConfig.from_json(value)
            elif line_idx == 1:
                self.benchmark_config = value
            else:
                if skipped < skip_begin_profiles:
                    skipped += 1
                else:
                    self.entries.append(value)

    def __repr__(self):
        return f"Profile {{ partition_config: {self.partition_config}, entries: [...{len(self.entries)}] }}"

    def sum_decoding_time(self):
        decoding_time = 0
        for entry in self.entries:
            decoding_time += entry["events"]["decoded"]
        return decoding_time

    def decoding_time_relative_dev(self):
        dev_sum = 0
        avr_decoding_time = self.average_decoding_time()
        for entry in self.entries:
            dev_sum += (entry["events"]["decoded"] - avr_decoding_time) ** 2
        return math.sqrt(dev_sum / len(self.entries)) / avr_decoding_time

    def average_decoding_time(self):
        return self.sum_decoding_time() / len(self.entries)

    def sum_defect_num(self):
        defect_num = 0
        for entry in self.entries:
            defect_num += entry["defect_num"]
        return defect_num

    def average_decoding_time_per_defect(self):
        return self.sum_decoding_time() / self.sum_defect_num()

    def sum_computation_cpu_seconds(self):
        total_computation_cpu_seconds = 0
        for entry in self.entries:
            computation_cpu_seconds = 0
            for event_time in entry["solver_profile"]["primal"]["event_time_vec"]:
                computation_cpu_seconds += event_time["end"] - \
                    event_time["start"]
            total_computation_cpu_seconds += computation_cpu_seconds
        return total_computation_cpu_seconds

    def average_computation_cpu_seconds(self):
        return self.sum_computation_cpu_seconds() / len(self.entries)

    def sum_job_time(self, unit_index):
        total_job_time = 0
        for entry in self.entries:
            event_time = entry["solver_profile"]["primal"]["event_time_vec"][unit_index]
            total_job_time += event_time["end"] - event_time["start"]
        return total_job_time

    def average_job_time(self, unit_index):
        return self.sum_job_time(unit_index) / len(self.entries)


class VertexRange:
    def __init__(self, start, end):
        self.range = (start, end)

    def __repr__(self):
        return f"[{self.range[0]}, {self.range[1]}]"

    def length(self):
        return self.range[1] - self.range[0]


class PartitionConfig:
    def __init__(self, vertex_num):
        self.vertex_num = vertex_num
        self.partitions = [VertexRange(0, vertex_num)]
        self.fusions = []
        self.parents = [None]

    def __repr__(self):
        return f"PartitionConfig {{ vertex_num: {self.vertex_num}, partitions: {self.partitions}, fusions: {self.fusions} }}"

    @staticmethod
    def from_json(value):
        vertex_num = value['vertex_num']
        config = PartitionConfig(vertex_num)
        config.partitions.clear()
        for vertex_range in value['partitions']:
            config.partitions.append(VertexRange(
                vertex_range[0], vertex_range[1]))
        for pair in value['fusions']:
            config.fusions.append((pair[0], pair[1]))
        assert len(config.partitions) == len(config.fusions) + 1
        unit_count = len(config.partitions) * 2 - 1
        # build parent references
        parents = [None] * unit_count
        for fusion_index, (left_index, right_index) in enumerate(config.fusions):
            unit_index = fusion_index + len(config.partitions)
            assert left_index < unit_index
            assert right_index < unit_index
            assert parents[left_index] is None
            assert parents[right_index] is None
            parents[left_index] = unit_index
            parents[right_index] = unit_index
        for unit_index in range(unit_count - 1):
            assert parents[unit_index] is not None
        assert parents[unit_count - 1] is None
        config.parents = parents
        return config

    def unit_depth(self, unit_index):
        depth = 0
        while self.parents[unit_index] is not None:
            unit_index = self.parents[unit_index]
            depth += 1
        return depth


# git_root_dir = subprocess.run("git rev-parse --show-toplevel", cwd=os.path.dirname(os.path.abspath(
#     __file__)), shell=True, check=True, capture_output=True).stdout.decode(sys.stdout.encoding).strip(" \r\n")
# rust_dir = git_root_dir


# Calculate Fusion Time

In [15]:
benchmark_profile_path = "./temp1.txt"
partition_num = 2
delta_T = 50
d = 5

profile = Profile(benchmark_profile_path)
# config = profile.partition_config
# for i in range(partition_num):  # check partition is indeed delta_T height
#     assert config.partitions[i].length() == (d+1) * (d+1) // 2 * delta_T
fusion_time_vec = []
for entry in profile.entries:
    event_time_vec = entry["solver_profile"]["primal"]["event_time_vec"]
    assert len(event_time_vec) == 2 * partition_num - 1
    event_time = event_time_vec[-1]
    fusion_time = event_time["end"] - event_time["start"]
    fusion_time_vec.append(fusion_time)
average_time = sum(fusion_time_vec) / len(fusion_time_vec)
stddev_time = math.sqrt(sum([(time - average_time) ** 2 for time in fusion_time_vec]) / len(fusion_time_vec))
samples_str = ["%.3e" % time for time in fusion_time_vec]
print(f"d {d}: average {average_time}, stddev {stddev_time}")

d 5: average 0.011682184330949955, stddev 0.11000249915106511
