In [2]:
import pandas as pd
import numpy as np
import json

In [15]:
SZs = [1, 3, 5, 7]
SDs = [1, 2, 3, 4, 5, 6]

In [12]:
path_patterns = {
    'funsd': {
        'laser': './outputs/funsd/funsd-sz{}-sd{}/decode_result/evaluation.json',
    },
}


In [13]:
def load_laser_results(path_pattern):
    results = {}
    for sz in SZs:
        for sd in SDs:
            path = path_pattern.format(sz, sd)
            info = json.load(open(path))
            pre = info['pre']
            rec = info['rec']
            f1 = info['f1']

            if sz not in results:
                results[sz] = [[pre], [rec], [f1]]
            else:
                results[sz][0].append(pre)
                results[sz][1].append(rec)
                results[sz][2].append(f1)
    return results

def avg(x):
    if len(x) == 0: return 0
    else: return sum(x)/len(x)

def round2(x):
    return '%.2f'%x

def create_table(dataset_names, model_names):
    raw_data = [[0 for _ in range(len(dataset_names)*3)] for _ in range(len(SZs)*len(model_names))]
    for col, dataset_name in enumerate(dataset_names):
        for j, model in enumerate(model_names):
            results = load_laser_results(path_pattern=path_patterns[dataset_name][model])
            for i, sz in enumerate(SZs):
                results_sz = results[sz]
                pres, recs, f1s = results_sz
                
                pre = avg(pres)
                rec = avg(recs)
                f1 = avg(f1s)
                
                pre = round2(100*pre)
                rec = round2(100*rec)
                f1 = round2(100*f1)
                
                pre_std = np.std(pres)
                rec_std = np.std(recs)
                f1_std = np.std(f1s)
                
                pre_std = round2(pre_std*100)
                rec_std = round2(rec_std*100)
                f1_std = round2(f1_std*100)
                
                raw_data[i*len(model_names)+j][col*3] = (pre, pre_std)
                raw_data[i*len(model_names)+j][col*3+1] = (rec, rec_std)
                raw_data[i*len(model_names)+j][col*3+2] = (f1, f1_std)
    
    for col, dataset_name in enumerate(dataset_names):
        for i, sz in enumerate(SZs):
            for m in range(3):
                for j, model in enumerate(model_names):
                    a, b = raw_data[i*len(model_names)+j][col*3+m]
                    raw_data[i*len(model_names)+j][col*3+m] = f'{a}_{b}'
                
    df = pd.DataFrame(raw_data, index=pd.MultiIndex.from_product([SZs, model_names]), columns=pd.MultiIndex.from_product([dataset_names, ['Precision', 'Recall', 'F-1']]))
    return df

In [16]:
dataset_names = ['funsd']
model_names = ['laser']
table = create_table(dataset_names, model_names)
table

Unnamed: 0_level_0,Unnamed: 1_level_0,funsd,funsd,funsd
Unnamed: 0_level_1,Unnamed: 1_level_1,Precision,Recall,F-1
1,laser,31.33_4.43,37.07_7.55,33.70_5.02
3,laser,43.12_1.45,47.05_6.43,44.85_3.92
5,laser,46.78_2.32,52.60_1.33,49.50_1.60
7,laser,50.15_3.36,53.87_3.00,51.88_2.63
