In [1]:
import numpy as np
import pandas as pd
import pickle

def read_pickle(handle):
    return pickle.load(open(handle, "rb"))

models = read_pickle('tables/models.evaluated.pkl')
models_random = read_pickle('tables/models.random.evaluated.pkl')

In [2]:
def summarize_models(db, random):
    columns = ['model_name', 
               'mean_average_precision', 
               'average_precision_at_k=1', 
               'average_precision_at_k=2', 
               'average_precision_at_k=3', 
               'average_precision_at_k=4',
               'average_precision_at_k=5',
               'description']

    list_of_series = []
    for model in db.keys():
        series = pd.Series(pd.NA, name=model, index=columns)
        series.loc['model_name'] = model
        series.loc['mean_average_precision'] = db[model]['mean_average_precision']
        for k in range(1, 6):
            series.loc['average_precision_at_k={}'.format(k)] = db[model]['ap@k'][k]
        series.loc['description'] = db[model]['description']
        list_of_series.append(series.to_frame().T)
    
    for model in random.keys():
        series = pd.Series(pd.NA, name=model, index=columns)
        series.loc['model_name'] = model
        series.loc['mean_average_precision'] = random[model]['mean_average_precision']
        for k in range(1, 6):
            series.loc['average_precision_at_k={}'.format(k)] = random[model]['ap@k'][k]
        series.loc['description'] = random[model]['description']
        list_of_series.append(series.to_frame().T)
    
    
    return pd.concat(list_of_series, ignore_index=True)

In [3]:
normal = summarize_models(models, models_random)
normal.sort_values(['average_precision_at_k=1', 'mean_average_precision'], ascending=False, inplace=True)
normal.to_csv('tables/models.performance.txt', sep='\t', index=False)

(normal
 .loc[:, ['model_name', 'average_precision_at_k=1', 'mean_average_precision', 'description']]
 .head(5)
)

Unnamed: 0,model_name,average_precision_at_k=1,mean_average_precision,description
12,snf_fda-cgc,0.193634,0.128741,"We perform similarity network fusion (SNF, Bo ..."
9,multi-pass-sort_fda-cgc,0.167109,0.130507,A weakness of agreement based measure is that ...
11,snf_cgc,0.161804,0.123493,Rather than collapse all data types into a sin...
10,snf_fda-cgc-genes,0.159151,0.127548,"Rather than perform a two-pass heuristic sort,..."
6,nonsynonymous-variant-count,0.158333,0.114908,We assign neighbors based on the absolute valu...


In [4]:
normal.to_excel("tables/Supplementary Table 5.xlsx")