In [1]:
from sklearn import metrics
import pandas as pd
from epilepsy_prediction import load_model, data_preprocess
from fuse.eval.metrics.metrics_common import  CI

from copy import deepcopy
from statsmodels.stats.contingency_tables import mcnemar
from sklearn.metrics import confusion_matrix
from collections import OrderedDict
from fuse.eval.metrics.metrics_common import GroupAnalysis, CI, Filter
from fuse.eval.metrics.metrics_model_comparison import PairedBootstrap
from fuse.eval.metrics.classification.metrics_classification_common import (
    MetricAUCPR,
    MetricAUCROC,
    MetricAccuracy,
    MetricConfusion,
    MetricConfusionMatrix,
    MetricBSS,
    MetricROCCurve,
)
from fuse.eval.metrics.classification.metrics_model_comparison_common import (
    MetricDelongsTest,
    MetricMcnemarsTest,
)
from fuse.eval.evaluator import EvaluatorDefault

seed =543463469

  from .autonotebook import tqdm as notebook_tqdm


In [55]:
def prepare_fuse_data(clf,clf2,X,y):
    res = pd.DataFrame(columns=['pred','target','id'])
    res['id']=X.index
    res['pred1'] = clf.predict_proba(X)[:,1].squeeze()
    res['pred2'] = clf2.predict_proba(X)[:,1].squeeze()
    res['target'] = y.astype(int).values  
    return res

def fuse_string(results):
    sum = []
    for k,v in results['metrics'].items():
        if 'auc' in k:
            sum.append(k + f': {v['mean']:.2f} (upper: {v['conf_upper']:.2f}, lower: {v['conf_lower']:.2f})')
        if 'mcnemar' or 'delong' in k:
            sum.append(k + f': {v['p_value']:.2f}')
    return sum

def load_data(drug_name,file_name='/Users/yoavkt/Documents/epilepsy_data/{}_data.pkl'):
    data = pd.read_pickle(file_name.format(drug_name))
    return data['X'],data['y']

In [3]:
drug_name="carbamazepine"
clf_mk = load_model(drug_name)
drug_name="est_carba"
clf_bi = load_model(drug_name)
drug_name = 'carbamazepine'
X,y = load_data(drug_name)

In [4]:
auc = metrics.roc_auc_score(y, clf_mk.predict_proba(X)[:,1])
print(auc)

0.7411711952961847


In [5]:
auc = metrics.roc_auc_score(y, clf_bi.predict_proba(X)[:,1])
print(auc)

0.6307010834803918


In [6]:
spe_out_come = '/Users/yoavkt/Documents/outcome_break_vals.csv'
df_spec = pd.read_csv(spe_out_come)

In [7]:
metrics = OrderedDict([
            ("auc", CI(MetricAUCROC(pred="pred", target="target"), 
                       stratum="target", rnd_seed=seed)),
        ])

In [8]:
mat = confusion_matrix(y == clf_bi.predict(X), y==clf_mk.predict(X))
tes = mcnemar(mat)
print(tes)


pvalue      0.03806366960413442
statistic   71.0


In [56]:
metrics = OrderedDict([
            ("auc1", CI(MetricAUCROC(pred="pred1", target="target"), 
                       stratum="target", rnd_seed=seed)),
            ("auc2", CI(MetricAUCROC(pred="pred2", target="target"), 
                       stratum="target", rnd_seed=seed)),
            ("mcnemar_test", 
            MetricMcnemarsTest(pred1="pred1", pred2="pred2",target="target")),
            ('delong_test',
            MetricDelongsTest(pred1="pred1", pred2="pred2",target="target"))
        ])
evaluator = EvaluatorDefault()
results = evaluator.eval(ids=None, data=prepare_fuse_data(clf_bi,clf_mk,X,y), metrics=metrics) 


In [50]:
str(results)

"{'metrics': {'auc1': {'org': 0.6307010834803918, 'mean': 0.6305202378108457, 'std': 0.013209371726008591, 'conf_interval': 95, 'conf_lower': 0.6045142447617653, 'conf_upper': 0.655997865404918}, 'auc2': {'org': 0.6307010834803918, 'mean': 0.6305202378108457, 'std': 0.013209371726008591, 'conf_interval': 95, 'conf_lower': 0.6045142447617653, 'conf_upper': 0.655997865404918}, 'mcnemars_test': {'p_value': 1.0, 'statistic': 0.0, 'n1': 0.0, 'n2': 0.0}, 'delong_test': {'p_value': 2.2683800040159284e-16, 'z': -8.206971173643009, 'auc1': 0.6307010834803918, 'auc2': 0.7411711952961847, 'cov11': 0.00017924887655622966, 'cov12': 7.731947510668091e-05, 'cov22': 0.00015657571232559572}}}"