In [7]:
import pandas as pd
from keras.models import load_model
import numpy as np
import sklearn
from sklearn.metrics import roc_curve

k_batch_size = 96

df_test = pd.read_pickle("../../datasets/topoprocessed/test.pkl")
x_test = df_test.loc[:, df_test.columns != 'is_signal_new']
y_test = df_test["is_signal_new"]
del df_test

df_train = pd.read_pickle("../../datasets/topoprocessed/train.pkl")
df_val = pd.read_pickle("../../datasets/topoprocessed/val.pkl")
x_val = df_val.loc[:, df_train.columns != 'is_signal_new']
y_val = df_val["is_signal_new"]
del df_train
del df_val

#Can change to '', '_pt0','_pt', '_30', or '_standardize_pt'
mode = '_30'
if mode == '_pt0':
    #Get rid of pt_0 column
    x_test = x_test.loc[:, x_test.columns != 'pt_0']
    x_val = x_val.loc[:, x_val.columns != 'pt_0']
elif mode == '_pt':
    pt_cols = [col for col in x_test.columns if 'pt' in col]
    x_test = x_test.drop(pt_cols, axis=1)
    x_val = x_val.drop(pt_cols, axis=1)
elif mode == '_30':
    x_test = x_test.iloc[:,:30]
    x_val = x_val.iloc[:,:30]
elif mode == '_standardize_pt':
    pt_cols = [col for col in x_test.columns if 'pt' in col]
    x_test[pt_cols] = (x_test[pt_cols] - x_test[pt_cols].mean())/x_test[pt_cols].std()
    x_val[pt_cols] = (x_val[pt_cols] - x_val[pt_cols].mean())/x_val[pt_cols].std()

model = load_model('topodnnmodels/topodnnmodel' + mode)

In [8]:
def evaluate_model(model, x_val, y_val):
    score = model.evaluate(x_val, y_val, batch_size=k_batch_size)
    print("Loss: "+str(score[0]))
    print("Acc: "+str(score[1]))
    
    fpr, tpr, thresholds = roc_curve(
            y_val, model.predict(x_val).ravel())
    
    rej_at_30_marker  = None
    rej_at_50_marker  = None
    rej_at_80_marker  = None

    for i in range(len(fpr)):
        if(tpr[i] >= .30 and rej_at_30_marker is None):
            rej_at_30_marker = 1 / fpr[i]
            print("Background rejection at 30\% signal efficiency:" +
                        str(rej_at_30_marker))
        if(tpr[i] >= .50 and rej_at_50_marker is None):
            rej_at_50_marker = 1 / fpr[i]
            print("Background rejection at 50\% signal efficiency:" +
                        str(rej_at_50_marker))
        if(tpr[i] >= .80 and rej_at_80_marker is None):
            rej_at_80_marker = 1 / fpr[i]
            print( "Background rejection at 80\% signal efficiency:" +
                   str(rej_at_80_marker))
    current = sklearn.metrics.auc(fpr, tpr)
    print("AUC:"+str(current))
    return fpr,tpr,current
    
print("Performance on validation set")
_ = evaluate_model(model,x_val,y_val)
print("Performance on test set")
_ = evaluate_model(model,x_test,y_test)

Performance on validation set
Loss: 0.24460259079933167
Acc: 0.8944640159606934
Background rejection at 30\% signal efficiency:266.1862615587847
Background rejection at 50\% signal efficiency:83.61120331950207
Background rejection at 80\% signal efficiency:16.512578874047367
AUC:0.9617752759676121
Performance on test set
Loss: 0.24348796904087067
Acc: 0.8946484923362732
Background rejection at 30\% signal efficiency:277.7359009628611
Background rejection at 50\% signal efficiency:90.54439461883408
Background rejection at 80\% signal efficiency:16.80935730935731
AUC:0.962256047025922


In [6]:
#Saves prediction values
to_save = model.predict(x_test)
#np.save('preds/topodnn' + mode + '.npy',to_save.ravel())