In [37]:
!pip install keras_sequential_ascii

import numpy as np

import matplotlib.pyplot as plt

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, LSTM
from keras.layers import Conv1D, MaxPooling1D, Conv2D, MaxPooling2D
from keras.layers.core import Reshape
from keras_sequential_ascii import sequential_model_to_ascii_printout
from keras.utils import plot_model
from keras.utils import np_utils
from keras.optimizers import SGD
from keras.constraints import maxnorm
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from keras import metrics

from sklearn.utils import shuffle

import random



In [38]:
# Connect gDrive to gColab
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
# Dataset variables
num_train = 60000
num_test = 15000
num_val = 15000

# Detector range
# ATLAS = abs(eta) < 2.5
# STAR - abs(eta) < 1.0
eta_max = 2.5

In [0]:
Neta = 10
Nphi = 10

In [0]:
# Copy data files to Colab
!cp "/content/gdrive/My Drive/PPSSP/data/LHC/Difr/Difr00.txt" "sig.txt"
!cp "/content/gdrive/My Drive/PPSSP/data/LHC/MinBias/MinBias00.txt" "bg.txt"

In [0]:
# Read files into list array
with open("sig.txt") as sig:
  pythia_sig = [line.split() for line in sig]

pythia_sig = np.array(pythia_sig)

with open("bg.txt") as bg:
  pythia_bg = [line.split() for line in bg]
  
pythia_bg = np.array(pythia_bg)

In [0]:
pythia_sig_tag = np.full((pythia_sig.shape[0], 1), 1)
pythia_bg_tag = np.full((pythia_bg.shape[0], 1), 0)

In [0]:
pythia = np.concatenate((pythia_sig, pythia_bg), axis = 0) 
pythia_tag = np.concatenate((pythia_sig_tag, pythia_bg_tag), axis = 0)

In [0]:
# Putting the data into a histogram
def histogram(data):
  out = np.empty((data.shape[0], Neta, Nphi))
  for i in range(data.shape[0]):
    # Split the data into separate values
    tmp = data[i]
    pT = np.array(tmp[0::3], dtype = float)
    eta = np.array(tmp[1::3], dtype = float)
    phi = np.array(tmp[2::3], dtype = float)
    
    # Define histogram parameters
    eta_range = (-eta_max, eta_max)
    phi_range = (0, 2*np.pi)
    r = (eta_range, phi_range)
    b = (Neta, Nphi)
    
    # Fill the histogram
    h = np.histogram2d(eta, phi, bins = b, range = r, weights = pT)
    
    out[i, ] = np.array(list(h[0]))
  return out

In [0]:
pythia = histogram(pythia)

In [0]:
pythia, pythia_tag = shuffle(pythia, pythia_tag)

In [0]:
num_fit = num_train + num_test
num_total = num_train + num_test + num_val

In [0]:
xx_train_pythia = pythia[0:num_train]
xx_test_pythia = pythia[num_train:num_fit]
xx_val_pythia = pythia[num_fit:num_total]

yy_train_pythia = pythia_tag[0:num_train]
yy_test_pythia= pythia_tag[num_train:num_fit]
yy_val_pythia = pythia_tag[num_fit:num_total]

yy_train_pythia = np_utils.to_categorical(yy_train_pythia, 2)
yy_test_pythia = np_utils.to_categorical(yy_test_pythia, 2)
yy_val_pythia = np_utils.to_categorical(yy_val_pythia, 2)


In [0]:
# Model variables
batch_size = 32
epochs = 64

# Used dataset
xx_train = xx_train_pythia
xx_test = xx_test_pythia
xx_val = xx_val_pythia

yy_train = yy_train_pythia
yy_test = yy_test_pythia
yy_val = yy_val_pythia

In [0]:
# Define Model
def base_model():
  model = Sequential()
  
  model.add(Reshape((Neta, Nphi, 1), input_shape=(Neta, Nphi, )))  
  
  model.add(Conv2D(64, kernel_size = 2, activation = 'relu'))
  model.add(Dropout(rate = 0.2))
  
  model.add(Flatten())
  
  model.add(Dense(320, activation = 'relu'))
  model.add(Dropout(rate = 0.2))
  
  model.add(Dense(160, activation = 'relu'))
  model.add(Dropout(rate = 0.3))

  model.add(Dense(2, activation = 'softmax'))
  
# Train model
  model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = [metrics.categorical_accuracy]) 
  return model

In [52]:
# Vizualizing model structure
# Model summary (ascii)
cnn_n = base_model()
cnn_n.summary()

# Model printout (ascii)
# sequential_model_to_ascii_printout(cnn_n)

# Plotting model in graphical mode
plot_model(cnn_n)   ###, to_file='model.png')  ###, show_shapes=True

W0807 18:08:35.011570 140383538726784 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0807 18:08:35.042845 140383538726784 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0807 18:08:35.082474 140383538726784 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0807 18:08:35.095799 140383538726784 deprecation.py:506] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Ins

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_1 (Reshape)          (None, 10, 10, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 9, 9, 64)          320       
_________________________________________________________________
dropout_1 (Dropout)          (None, 9, 9, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 5184)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 320)               1659200   
_________________________________________________________________
dropout_2 (Dropout)          (None, 320)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 160)               51360     
__________

In [0]:
# patient early stopping
es = EarlyStopping(monitor = 'val_categorical_accuracy', mode = 'max', verbose = 1, patience = 5)
mc = ModelCheckpoint('best_model_conventional_2.h5', monitor = 'val_categorical_accuracy', mode = 'max', verbose = 1, save_best_only = True)
# fit model
cnn = cnn_n.fit(xx_train, yy_train, batch_size = batch_size, epochs = epochs, validation_data = (xx_test, yy_test), shuffle = True, callbacks = [es, mc])
# load the saved model
saved_model = load_model('best_model_conventional_2.h5')


W0807 18:08:35.704194 140383538726784 deprecation.py:323] From /usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_grad.py:1250: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 60000 samples, validate on 15000 samples
Epoch 1/64
 8896/60000 [===>..........................] - ETA: 1:06 - loss: 0.3804 - categorical_accuracy: 0.8278

In [0]:

# evaluate loaded model
scores_train = saved_model.evaluate(xx_train, yy_train, verbose = 0)
scores_test  = saved_model.evaluate(xx_test, yy_test, verbose = 0)
scores_val   = saved_model.evaluate(xx_val, yy_val, verbose = 0)
print("Accuracy: Train: %.2f%%, Test: %.2f%%, Val: %.2f%% " % (scores_train[1]*100, scores_test[1]*100, scores_val[1]*100))

In [0]:
# Plots for training and testing process: loss and accuracy

n_epochs = len(cnn.history['loss'])

plt.figure(0)
plt.plot(cnn.history['categorical_accuracy'],'r')
plt.plot(cnn.history['val_categorical_accuracy'],'g')
plt.xticks(np.arange(0, n_epochs, 10.0))
plt.rcParams['figure.figsize'] = (8, 6)
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title("Training Cat_Accuracy vs Validation Cat_Accuracy")
plt.legend(['train','validation'])

plt.figure(1)
plt.plot(cnn.history['loss'],'r')
plt.plot(cnn.history['val_loss'],'g')
plt.xticks(np.arange(0, n_epochs, 10.0))
plt.rcParams['figure.figsize'] = (8, 6)
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title("Training Loss vs Validation Loss")
plt.legend(['train','validation'])
plt.show()

In [0]:
# Confusion matrix result

from sklearn.metrics import classification_report, confusion_matrix
YY_pred = saved_model.predict(xx_val, num_val, verbose=2)
yy_pred = np.argmax(YY_pred, axis=1)

yy_test2 = np.argmax(yy_test, axis=1)



#confusion matrix
cm = confusion_matrix(np.argmax(yy_val,axis=1),yy_pred)
print(cm)

# Visualizing of confusion matrix
import seaborn as sn
import pandas  as pd


df_cm = pd.DataFrame(cm, range(2),
                  range(2))
plt.figure(figsize = (10,7))
sn.set(font_scale = 1.4)#for label size
sn.heatmap(df_cm, annot = True, annot_kws = {"size": 12})# font size
plt.show()