In [1]:
from sklearn import metrics
import pandas as pd
from epilepsy_prediction import load_model, data_preprocess
from fuse.eval.metrics.metrics_common import  CI

from copy import deepcopy
from statsmodels.stats.contingency_tables import mcnemar
from sklearn.metrics import confusion_matrix
from collections import OrderedDict
from fuse.eval.metrics.metrics_common import CI
from fuse.eval.metrics.classification.metrics_classification_common import (
    MetricAUCROC,
)
from fuse.eval.metrics.classification.metrics_model_comparison_common import (
    MetricDelongsTest,
    MetricMcnemarsTest,
)
from fuse.eval.evaluator import EvaluatorDefault

seed =543463469

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def prepare_fuse_data(clf,clf2,X,y):
    res = pd.DataFrame(columns=['pred','target','id'])
    res['id']=X.index
    res['pred1'] = clf.predict(X).squeeze()
    res['pred2'] = clf2.predict(X).squeeze()
    res['pred_proba1'] = clf.predict_proba(X)[:,1].squeeze()
    res['pred_proba2'] = clf2.predict_proba(X)[:,1].squeeze()
    res['target'] = y.astype(int).values  
    return res

def fuse_string(results):
    sum = []
    for k,v in results['metrics'].items():
        if 'auc' in k:
            sum.append(k + f": {v['mean']:.2f} (upper: {v['conf_upper']:.2f}, lower: {v['conf_lower']:.2f}) \n ")
        if 'mcnemar' in k or 'delong' in k:
            sum.append(k + f": p-value {v['p_value']:.3f} \n ")
    return ''.join(sum)

def load_data(drug_name,file_name='/Users/yoavkt/Documents/epilepsy_data/{}_data.pkl'):
    data = pd.read_pickle(file_name.format(drug_name))
    return data['X'],data['y']

In [3]:
drug_name="carbamazepine"
clf_mk = load_model(drug_name)
drug_name="est_carba"
clf_bi = load_model(drug_name)
drug_name = 'carbamazepine'
X,y = load_data(drug_name)

In [4]:
metrics = OrderedDict([
                ("auc1", CI(MetricAUCROC(pred="pred1", target="target"), 
                       stratum="target", rnd_seed=seed)),
            ("auc2", CI(MetricAUCROC(pred="pred2", target="target"), 
                       stratum="target", rnd_seed=seed)),
            ("mcnemar_test", 
            MetricMcnemarsTest(pred1="pred1", pred2="pred2",target="target")),
            ('delong_test',
            MetricDelongsTest(pred1="pred_proba1", pred2="pred_proba2",target="target"))
        ])
evaluator = EvaluatorDefault()
results = evaluator.eval(ids=None, data=prepare_fuse_data(clf_bi,clf_mk,X,y), metrics=metrics) 


Results:

Metric auc1:
------------------------------------------------
org:
0.5
mean:
0.5
std:
0.0
conf_interval:
95
conf_lower:
0.5
conf_upper:
0.5

Metric auc2:
------------------------------------------------
org:
0.5814392333952392
mean:
0.5813652127466922
std:
0.009033053817870744
conf_interval:
95
conf_lower:
0.5640523014325676
conf_upper:
0.5995515038492067

Metric mcnemar_test:
------------------------------------------------
p_value:
0.03806366960413442
statistic:
71.0
n1:
71.0
n2:
99.0

Metric delong_test:
------------------------------------------------
p_value:
2.2683800040159284e-16
z:
-8.206971173643009
auc1:
0.6307010834803918
auc2:
0.7411711952961847
cov11:
0.00017924887655622966
cov12:
7.731947510668091e-05
cov22:
0.00015657571232559572



In [5]:
fuse_string(results)

'auc1: 0.50 (upper: 0.50, lower: 0.50) \n auc2: 0.58 (upper: 0.60, lower: 0.56) \n mcnemar_test: p-value 0.038 \n delong_test: p-value 0.000 \n '