# Code to analyze trained models and plot validation and roc curves
August 26, 2019


In [8]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import h5py

import subprocess as sp
import pickle


In [9]:
## M-L modules
import tensorflow.keras
from tensorflow.keras import layers, models, optimizers, callbacks  # or tensorflow.keras as keras
import tensorflow as tf
from sklearn.utils import shuffle
from sklearn.metrics import roc_curve, auc, roc_auc_score
from tensorflow.keras.models import load_model


In [10]:
%matplotlib widget

## Modules

In [1]:
def f_get_data(filename):
    '''
    Function to get data from hdf5 files into images, labels and weights.
    '''
    try: 
        hf = h5py.File(filename)

    except:
        print(e)
        print("Name of file",filename)
        raise SystemError

    idx=50000
    images = np.expand_dims(hf['all_events']['hist'][:idx], -1)
    labels = hf['all_events']['y'][:idx]
    weights = hf['all_events']['weight'][:idx]
    weights = np.log(weights+1)

    keys=['images','labels','weights']
    values_dict=dict(zip(keys,[images,labels,weights]))

    return values_dict


def f_plot_learning(history):
    '''Plot learning curves : Accuracy and Validation'''
    fig=plt.figure()
    # Plot training & validation accuracy values
    fig.add_subplot(2,1,1)
    xlim=len(history['acc'])
    
    plt.plot(history['acc'],label='Train',marker='o')
    plt.plot(history['val_acc'],label='Validation',marker='*')
#     plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xticks(np.arange(0,xlim,2))
    
    # Plot loss values
    fig.add_subplot(2,1,2)
    plt.plot(history['loss'],label='Train',marker='o')
    plt.plot(history['val_loss'],label='Validation',marker='*')
#     plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.xticks(np.arange(0,xlim,2))

    plt.legend(loc='best')



def f_plot_roc_curve(fpr,tpr):
    '''
    Module for roc plot and printing AUC
    '''
    plt.figure()
    # plt.plot(fpr,tpr)
    plt.scatter(fpr,tpr)
    plt.semilogx(fpr, tpr)
  # Zooms
    plt.xlim([10**-7,1.0])
    plt.ylim([0,1.0])
    # y=x line for comparison
    x=np.linspace(0,1,num=500)
    plt.plot(x,x)
#     plt.xscale('log')
#     plt.xlim(1e-10,1e-5)
    plt.show()

    # AUC 
    auc_val = auc(fpr, tpr)
    print("AUC: ",auc_val)

## Read stored model

In [None]:
model_save_dir='saved_data/'
# Load model and history
model=load_model(model_save_dir+fname_model)
with open(model_save_dir+fname_history,'rb') as f:
    history= pickle.load(f)

In [20]:
model.summary()
# Plot tested model
f_plot_learning(history)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 64, 64, 10)        100       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 32, 32, 10)        0         
_________________________________________________________________
dropout (Dropout)            (None, 32, 32, 10)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 10)        910       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 10)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 16, 10)        0         
__________

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Test data

In [None]:
### Extract the training and validation data
data_dir='/global/project/projectdirs/dasrepo/vpa/atlas_cnn/data/RPVSusyData/'

#### Test_data
filename=data_dir+'val.h5'
test_data_dict=f_get_data(filename)

In [23]:
test_x,test_y,test_wts=test_data_dict['images'],test_data_dict['labels'],test_data_dict['weights']
print(test_x.shape,test_y.shape,test_wts.shape)

(50000, 64, 64, 1) (50000,) (50000,)


### Predictions and roc curve

In [22]:
# Make predictions
y_pred=model.predict(test_x,verbose=1)

fpr,tpr,threshold=roc_curve(test_y,y_pred,sample_weight=test_wts)
print(fpr.shape,tpr.shape,threshold.shape)
# Plot roc curve
f_plot_roc_curve(fpr,tpr)

(14125,) (14125,) (14125,)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

AUC:  0.9172504911841929
