In [None]:
# import the necessary libraries
import ROOT
from subprocess import call
from os.path import isfile

import warnings
warnings.filterwarnings('ignore')

In [None]:
# a function for loading the data
def loaddata(name):
    filestring="/home/lewis/particles/evt_500k_" + name + ".root"
    file = ROOT.TFile.Open(filestring, 'read')
    particles = file.Get('t1')
    return file, particles

#load the data for the different particles
e, electrons = loaddata('electrons')
pi , pions = loaddata('pions')
m , muons = loaddata('muons')
k , kaons = loaddata('kaons')
p , protons = loaddata('protons')

In [None]:
# Add variables to dataloader
dataloader = ROOT.TMVA.DataLoader('dataset_pymva')
numVariables = len(electrons.GetListOfBranches())
for branch in electrons.GetListOfBranches():
    dataloader.AddVariable(branch.GetName())

# Add trees to dataloader
dataloader.AddTree(electrons, 'Electrons')
dataloader.AddTree(pions, 'Pions')
dataloader.AddTree(muons, 'Muons')
dataloader.AddTree(kaons, 'Kaons')
dataloader.AddTree(protons, 'Protons')
dataloader.PrepareTrainingAndTestTree(ROOT.TCut(''),'SplitMode=Random:NormMode=None:!V')

In [None]:
import numpy as np
import pandas as pd
from root_numpy import root2array, rec2array, list_branches
branch_names = list_branches("/home/lewis/particles/evt_500k_electrons.root")

# load the same data, but in a numpy array
# this numpy array will be used for training a keras model
def loadtrainingdata(name):
    filestring="/home/lewis/particles/evt_500k_" + name + ".root"
    vals = root2array(filestring, 't1', branch_names)
    vals = rec2array(vals)
    print(vals.shape)
    return vals


# load the training data
electrons = loadtrainingdata('electrons')
pions = loadtrainingdata('pions')
muons = loadtrainingdata('muons')
kaons = loadtrainingdata('kaons')
protons = loadtrainingdata('protons')

# create the pandas dataframe
X = np.concatenate((electrons, pions, muons, kaons, protons))
y = np.concatenate(( np.zeros(electrons.shape[0]),np.ones(pions.shape[0]), (2*np.ones(muons.shape[0])), (3*np.ones(kaons.shape[0])), (4*np.ones(protons.shape[0])) ))
df = pd.DataFrame(np.hstack((X, y.reshape(y.shape[0], -1))),columns=branch_names+['temp'])

In [None]:
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

num_classes = 5

#split the data into training and testing
X_train,X_test, y_train,y_test = train_test_split(df.iloc[:,0:-1], df.iloc[:,-1],\
                                test_size=0.20, random_state=42)
# one hot encoding
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [None]:
# Setup TMVA
ROOT.TMVA.Tools.Instance()
ROOT.TMVA.PyMethodBase.PyInitialize()

outputFile = ROOT.TFile.Open('TMVAOutputPyMVA.root', 'RECREATE')
factory = ROOT.TMVA.Factory('TMVAClassification', outputFile,
        '!V:!Silent:Color:DrawProgressBar:Transformations=I,G:'+\
        'AnalysisType=Multiclass')

In [None]:
# Select Theano as backend for Keras
from os import environ
environ['KERAS_BACKEND'] = 'theano'

# Set architecture of system (AVX instruction set is not supported on SWAN)
environ['THEANO_FLAGS'] = 'gcc.cxxflags=-march=corei7'

from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

# some variables
batch_size = 4096
epochs = 1200



# Define model
model = Sequential()
model.add(Dense(128, init='glorot_normal', activation='relu',
        input_dim=numVariables))
model.add(Dense(64, init='glorot_normal', activation='relu'))
model.add(Dense(32, init='glorot_normal', activation='relu'))
model.add(Dense(16, init='glorot_normal', activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(num_classes, init='glorot_uniform', activation='softmax'))

# Set loss and optimizer
model.compile(loss='categorical_crossentropy', optimizer=Adam(),
        metrics=['accuracy',])

# Print summary of model
model.summary()

# train the model with these parameters
history = model.fit(X_train , y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    callbacks=[EarlyStopping(verbose=True, patience=100, monitor='val_loss')],
                    validation_data=(X_test , y_test))


# Store model to file|
model.save('model.h5')

In [None]:
# Keras interface with previously defined model
factory.BookMethod(dataloader, ROOT.TMVA.Types.kPyKeras, 'PyKeras',\
        'H:!V:VarTransform=G:FilenameModel=model.h5:'+\
        'NumEpochs=1:BatchSize=32')

In [None]:
# Run training
# This will continue the training began earlier
factory.TrainAllMethods()

In [None]:
#Run testing and evaluation of the model
factory.TestAllMethods()
factory.EvaluateAllMethods()