In [77]:
import os
import json
import pandas as pd

df = None
pd.options.display.float_format = "{:,.4f}".format

base_dir = "."

datasets = {"joined": "Joined", "private_data": "Private"}

models = {
    "bert_small": "BERT-small",
    "bert_mini": "BERT-mini",
}

compression_methods_gpu = {
    "static": "Static quant.",
    "qat": "Quant. aware training",
    "onnx_fp16_optimized": "Float16 optimized",
    "onnx_gpu_baseline": "GPU baseline",
    "onnx_optimized_baseline": "Optimized baseline",
}

compression_methods_cpu = {
    "dynamic_quantization": "Dynamic quantization",
    "onnx_cpu_baseline": "CPU baseline",
}


def do_thing(compression_methods, dataset_target, baseline_name):
    rows = []
    baseline_throughputs = {}

    # Identify baseline throughputs
    for dataset, dataset_name in datasets.items():
        if dataset != dataset_target:
            continue
        for model, model_name in models.items():
            comp_path = os.path.join(base_dir, dataset, model, baseline_name)
            perf_file = os.path.join(comp_path, "performance.json")
            if os.path.isfile(perf_file):
                with open(perf_file, "r") as pf:
                    perf = json.load(pf)
                    baseline_throughputs[(dataset_name, model_name)] = {
                        "model": perf.get("time_n_per_s_forward_without_softmax") or 1.0,
                        "total": perf.get("time_n_per_s_total_with_tokenizer") or 1.0,
                    }

    # Collect rows
    for dataset, dataset_name in datasets.items():
        if dataset != dataset_target:
            continue
        for model, model_name in models.items():
            for comp_key, comp_name in compression_methods.items():
                comp_path = os.path.join(base_dir, dataset, model, comp_key)
                perf_file = os.path.join(comp_path, "performance.json")
                if not os.path.isfile(perf_file):
                    continue

                metrics = {}
                metrics_file = os.path.join(comp_path, "best_model_metrics.json")
                if os.path.isfile(metrics_file):
                    with open(metrics_file, "r") as f:
                        metrics = json.load(f)

                with open(perf_file, "r") as pf:
                    perf = json.load(pf)
                    thr_model = perf.get("time_n_per_s_forward_without_softmax")
                    thr_total = perf.get("time_n_per_s_total_with_tokenizer")

                base_vals = baseline_throughputs.get((dataset_name, model_name), {"model": 1.0, "total": 1.0})
                speed_model = thr_model / base_vals["model"] if thr_model is not None else None
                speed_total = thr_total / base_vals["total"] if thr_total is not None else None

                rows.append(
                    {
                        "dataset": dataset_name,
                        "model": model_name,
                        "Compression method": comp_name,
                        "1_f1": metrics.get("1", {}).get("f1-score"),
                        "1_recall": metrics.get("1", {}).get("recall"),
                        "1_precision": metrics.get("1", {}).get("precision"),
                        "macro_f1": metrics.get("macro avg", {}).get("f1-score"),
                        "roc_auc": metrics.get("roc_auc_score"),
                        "throughput_model": thr_model,
                        "throughput_total": thr_total,
                        "speedup_model": speed_model,
                        "speedup_total": speed_total,
                    }
                )

    df = pd.DataFrame(rows)

    # Reorder within each model by model-throughput speedup descending
    df = df.sort_values(by=["model", "speedup_model"], ascending=[True, False])

    # Format throughputs with speedups
    def fmt(val, speed):
        if val is None:
            return None
        base = f"{int(round(val)):,}"
        return f"{base} ({speed:.2f}x)" if speed else base

    df["throughput_model"] = df.apply(lambda r: fmt(r["throughput_model"], r["speedup_model"]), axis=1)
    df["throughput_total"] = df.apply(lambda r: fmt(r["throughput_total"], r["speedup_total"]), axis=1)

    for col in ["1_f1", "1_recall", "1_precision", "macro_f1", "roc_auc"]:
        df[col] = df[col].map(lambda x: "{:,.4f}".format(x) if pd.notnull(x) else None)

    df = df.drop(columns=["speedup_model", "speedup_total"])
    display(df)

In [78]:
do_thing(compression_methods_gpu, "joined", 'onnx_gpu_baseline')

Unnamed: 0,dataset,model,Compression method,1_f1,1_recall,1_precision,macro_f1,roc_auc,throughput_model,throughput_total
7,Joined,BERT-mini,Float16 optimized,0.9293,0.921,0.9377,0.9543,0.9875,"8,624 (3.25x)","3,316 (1.83x)"
5,Joined,BERT-mini,Static quant.,0.925,0.9272,0.9227,0.9514,0.9879,"5,454 (2.05x)","2,404 (1.32x)"
6,Joined,BERT-mini,Quant. aware training,0.931,0.9158,0.9466,0.9555,0.9879,"5,454 (2.05x)","2,404 (1.32x)"
9,Joined,BERT-mini,Optimized baseline,0.9293,0.921,0.9378,0.9544,0.9875,"3,267 (1.23x)","2,060 (1.13x)"
8,Joined,BERT-mini,GPU baseline,0.9293,0.921,0.9378,0.9544,0.9875,"2,656 (1.00x)","1,815 (1.00x)"
2,Joined,BERT-small,Float16 optimized,0.9322,0.9148,0.9501,0.9563,0.9845,"4,469 (6.37x)","2,484 (3.99x)"
0,Joined,BERT-small,Static quant.,0.9306,0.9176,0.944,0.9553,0.9853,"3,286 (4.68x)","2,267 (3.64x)"
1,Joined,BERT-small,Quant. aware training,0.9333,0.9121,0.9555,0.9571,0.9853,"3,286 (4.68x)","2,267 (3.64x)"
4,Joined,BERT-small,Optimized baseline,0.9322,0.9149,0.9501,0.9563,0.9845,770 (1.10x),673 (1.08x)
3,Joined,BERT-small,GPU baseline,0.9322,0.9149,0.9501,0.9563,0.9845,702 (1.00x),622 (1.00x)


In [79]:
do_thing(compression_methods_gpu, "private_data", 'onnx_gpu_baseline')

Unnamed: 0,dataset,model,Compression method,1_f1,1_recall,1_precision,macro_f1,roc_auc,throughput_model,throughput_total
7,Private,BERT-mini,Float16 optimized,0.8625,0.8367,0.8899,0.9246,0.9589,"4,705 (2.94x)","1,701 (1.62x)"
5,Private,BERT-mini,Static quant.,0.8408,0.8461,0.8355,0.9123,0.957,"3,688 (2.31x)","1,530 (1.46x)"
6,Private,BERT-mini,Quant. aware training,0.8534,0.8149,0.8956,0.9197,0.9577,"3,688 (2.31x)","1,530 (1.46x)"
9,Private,BERT-mini,Optimized baseline,0.8625,0.8368,0.8899,0.9246,0.9589,"2,015 (1.26x)","1,206 (1.15x)"
8,Private,BERT-mini,GPU baseline,0.8625,0.8368,0.8899,0.9246,0.9589,"1,598 (1.00x)","1,049 (1.00x)"
2,Private,BERT-small,Float16 optimized,0.8649,0.8104,0.9272,0.9261,0.9599,"3,166 (6.43x)","1,573 (3.68x)"
0,Private,BERT-small,Static quant.,0.8595,0.8168,0.9069,0.9231,0.9565,"2,335 (4.74x)","1,357 (3.18x)"
1,Private,BERT-small,Quant. aware training,0.8486,0.78,0.9304,0.9174,0.9564,"2,335 (4.74x)","1,357 (3.18x)"
4,Private,BERT-small,Optimized baseline,0.8648,0.8103,0.9271,0.9261,0.9599,545 (1.11x),463 (1.08x)
3,Private,BERT-small,GPU baseline,0.8648,0.8103,0.9271,0.9261,0.9599,493 (1.00x),427 (1.00x)


In [80]:
do_thing(compression_methods_cpu, "joined", 'onnx_cpu_baseline')

Unnamed: 0,dataset,model,Compression method,1_f1,1_recall,1_precision,macro_f1,roc_auc,throughput_model,throughput_total
2,Joined,BERT-mini,Dynamic quantization,0.928,0.9154,0.941,0.9536,0.987,198 (1.48x),191 (1.46x)
3,Joined,BERT-mini,CPU baseline,0.9289,0.9184,0.9396,0.9541,0.9872,134 (1.00x),131 (1.00x)
0,Joined,BERT-small,Dynamic quantization,0.9278,0.9126,0.9435,0.9535,0.9847,65 (2.62x),64 (2.60x)
1,Joined,BERT-small,CPU baseline,0.9289,0.9118,0.9467,0.9542,0.985,25 (1.00x),25 (1.00x)


In [81]:
do_thing(compression_methods_cpu, "private_data", 'onnx_cpu_baseline')

Unnamed: 0,dataset,model,Compression method,1_f1,1_recall,1_precision,macro_f1,roc_auc,throughput_model,throughput_total
2,Private,BERT-mini,Dynamic quantization,0.8495,0.8373,0.8621,0.9173,0.9554,85 (1.42x),82 (1.41x)
3,Private,BERT-mini,CPU baseline,0.8515,0.8325,0.8712,0.9184,0.9558,60 (1.00x),58 (1.00x)
0,Private,BERT-small,Dynamic quantization,0.8647,0.7908,0.9538,0.9262,0.9562,24 (1.19x),24 (1.19x)
1,Private,BERT-small,CPU baseline,0.8619,0.7997,0.9345,0.9245,0.9563,20 (1.00x),20 (1.00x)


In [82]:
rows = []
baseline_throughputs = {}

# Identify baseline throughputs

numbers = {
    '99.9': '99.9',
    '99.99': '99.99',
    '99.999': '99.999',
    '99.9999': '99.9999',
}
quant_dir = "./quantization_param_search_bert_small"

# Collect rows
for number, number_name in numbers.items():
    comp_path = os.path.join(quant_dir, number)
    metrics_file = os.path.join(comp_path, "best_model_metrics.json")
    if os.path.isfile(metrics_file):
        with open(metrics_file, 'r') as f:
            metrics = json.load(f)


            rows.append({
                'percentile': number_name,
                '1_f1': metrics.get('1', {}).get('f1-score'),
                '1_recall': metrics.get('1', {}).get('recall'),
                '1_precision': metrics.get('1', {}).get('precision'),
                'macro_f1': metrics.get('macro avg', {}).get('f1-score'),
                'roc_auc': metrics.get('roc_auc_score'),
            })
    
df = pd.DataFrame(rows)
display(df)

Unnamed: 0,percentile,1_f1,1_recall,1_precision,macro_f1,roc_auc
0,99.9,0.862,0.8187,0.91,0.9244,0.9542
1,99.99,0.864,0.811,0.9243,0.9256,0.9555
2,99.999,0.8693,0.8102,0.9377,0.9286,0.9562
3,99.9999,0.8648,0.8031,0.9368,0.9262,0.9556
