# Experiments with Neural networks

In [1]:
%matplotlib inline

In [2]:
import pandas
import numpy
import root_numpy
from hep_ml.nnet import MLPMultiClassifier
from rep.metaml import FoldingClassifier
from rep.estimators import SklearnClassifier
from matplotlib import pyplot as plt
from sklearn.metrics import roc_auc_score

## Reading features

In [3]:
import json
with open('features.json') as f:
    feature_families = json.load(f)
pv_features = feature_families.pop('PV')

In [4]:
train_features = numpy.concatenate(feature_families.values())
print len(train_features)
print train_features

60
[u'MuonNShared' u'MuonIsLooseMuon' u'MuonIsMuon' u'MuonBkgLL' u'MuonMuLL'
 u'TrackFitVeloChi2' u'TrackFitVeloNDoF' u'TrackFitMatchChi2'
 u'TrackGhostProbability' u'TrackP' u'TrackChi2PerDof' u'TrackFitTChi2'
 u'TrackPt' u'TrackNumDof' u'TrackFitTNDoF' u'TrackDOCA' u'InAccSpd'
 u'InAccPrs' u'InAccBrem' u'InAccEcal' u'InAccHcal' u'InAccMuon'
 u'CombDLLmu' u'CombDLLpi' u'CombDLLp' u'CombDLLe' u'CombDLLk'
 u'RichAboveMuThres' u'RichAboveElThres' u'RichAbovePiThres'
 u'RichAboveKaThres' u'RichAbovePrThres' u'RichUsedR1Gas' u'RichUsedR2Gas'
 u'RichDLLbt' u'RichDLLpi' u'RichDLLe' u'RichDLLp' u'RichDLLmu' u'RichDLLk'
 u'CaloBremMatch' u'CaloElectronMatch' u'CaloTrMatch' u'CaloTrajectoryL'
 u'CaloChargedSpd' u'CaloChargedPrs' u'CaloChargedEcal' u'CaloNeutralSpd'
 u'CaloNeutralPrs' u'CaloNeutralEcal' u'CaloSpdE' u'CaloPrsE' u'CaloEcalE'
 u'CaloHcalE' u'EcalPIDmu' u'HcalPIDmu' u'PrsPIDe' u'BremPIDe' u'EcalPIDe'
 u'HcalPIDe']


## Read the data

In [5]:
from utils import shrink_floats

data_train = pandas.DataFrame(root_numpy.root2array('../data/global_train.root', 'tree', step=1))
shrink_floats(data_train)

data_test_full  = pandas.DataFrame(root_numpy.root2array('../data/global_test.root', 'tree', step=1))
shrink_floats(data_test_full)

In [6]:
# creating synonyms
data_test = data_test_full

In [7]:
from collections import Counter
Counter(data_train.MCParticleType)

Counter({-2212: 481659,
         -321: 493085,
         -211: 499043,
         -13: 499794,
         -11: 501496,
         0: 999995,
         11: 498508,
         13: 500207,
         211: 500963,
         321: 506908,
         2212: 518335})

In [8]:
data_train.head()

Unnamed: 0,VeloCharge,BremPIDe,CaloNeutralPrs,CaloNeutralSpd,InAccBrem,InAccSpd,CaloPrsE,InAccPrs,HcalPIDe,CaloHcalE,...,piplus_OWNPV_XERR,piplus_OWNPV_YERR,piplus_OWNPV_ZERR,piplus_OWNPV_CHI2,piplus_OWNPV_NDOF,piplus_IP_OWNPV,piplus_IPCHI2_OWNPV,nCandidate,totCandidates,EventInSequence
0,1.02809,-999,-999,-999,0,0,-999.0,0,-999.0,-999.0,...,0.0096,0.0096,0.0532,33.816654,83,0.11134,15.622943,16,37,9099
1,1.044008,-999,-999,-999,0,0,-999.0,0,-999.0,-999.0,...,0.0064,0.0063,0.0358,67.432457,187,0.025907,0.510528,38,106,12220
2,0.853933,-999,-999,-999,0,1,155.237808,1,-1.92618,27002.507812,...,0.0108,0.0108,0.0557,29.747982,67,0.136919,5.52092,14,76,8573
3,1.30618,-999,-999,-999,0,1,11.793685,1,0.434916,0.0,...,0.0086,0.0084,0.0453,45.295311,97,0.572526,2.692502,25,50,13449
4,0.969101,-999,-999,-999,0,1,113.548508,1,1.788384,0.0,...,0.0115,0.011,0.0773,33.635342,71,0.101532,4.014179,24,86,1379


#### add signal column (from 0 to 5 classes), weights (to balance data)

In [9]:
from utils import compute_labels_and_weights, compute_charges, names_labels_correspondence, labels_names_correspondence
from utils import plot_hist_features, roc_auc_score_one_vs_all, compute_cum_sum, convert_DLL_to_LL

In [10]:
data_train['Signal'], data_train['Weight'] = compute_labels_and_weights(data_train.MCParticleType.values)
data_test['Signal'], data_test['Weight'] = compute_labels_and_weights(data_test.MCParticleType.values)

In [11]:
for family_name, family_features in feature_families.items():
    print "{:10}".format(family_name), len(family_features), '\t', family_features

muon       5 	[u'MuonNShared', u'MuonIsLooseMuon', u'MuonIsMuon', u'MuonBkgLL', u'MuonMuLL']
track      11 	[u'TrackFitVeloChi2', u'TrackFitVeloNDoF', u'TrackFitMatchChi2', u'TrackGhostProbability', u'TrackP', u'TrackChi2PerDof', u'TrackFitTChi2', u'TrackPt', u'TrackNumDof', u'TrackFitTNDoF', u'TrackDOCA']
acceptance 6 	[u'InAccSpd', u'InAccPrs', u'InAccBrem', u'InAccEcal', u'InAccHcal', u'InAccMuon']
DLL        5 	[u'CombDLLmu', u'CombDLLpi', u'CombDLLp', u'CombDLLe', u'CombDLLk']
RICH       13 	[u'RichAboveMuThres', u'RichAboveElThres', u'RichAbovePiThres', u'RichAboveKaThres', u'RichAbovePrThres', u'RichUsedR1Gas', u'RichUsedR2Gas', u'RichDLLbt', u'RichDLLpi', u'RichDLLe', u'RichDLLp', u'RichDLLmu', u'RichDLLk']
CALO       20 	[u'CaloBremMatch', u'CaloElectronMatch', u'CaloTrMatch', u'CaloTrajectoryL', u'CaloChargedSpd', u'CaloChargedPrs', u'CaloChargedEcal', u'CaloNeutralSpd', u'CaloNeutralPrs', u'CaloNeutralEcal', u'CaloSpdE', u'CaloPrsE', u'CaloEcalE', u'CaloHcalE', u'EcalPIDmu',

In [12]:
def save_predictions(predictions, filename):
    """
    Saving predictions on the format appropriate for Tatiana.
    """
    import cPickle
    saved_predictions = {
        i: predictions[:, i] for i in range(6)
    }
    with open(filename, 'w') as f:
        cPickle.dump(saved_predictions, f)

## Function to print quality

In [13]:
from utils import roc_auc_score_one_vs_all

## One layer MLP hep_ml

In [None]:
%%time
mlp_clf = SklearnClassifier(MLPMultiClassifier(layers=[20], epochs=1000, scaler='iron'), 
                            features=train_features)
mlp_clf.fit(data_train, data_train.Signal)

In [None]:
roc_auc_score_one_vs_all(data_test.Signal, mlp_clf.predict_proba(data_test), data_test.Weight)

In [None]:
roc_auc_score_one_vs_all(data_train.Signal, mlp_clf.predict_proba(data_train), data_train.Weight)

## Checking simple MLP

In [None]:
%%time
mlp_clf = SklearnClassifier(MLPMultiClassifier(layers=[40, 30, 20], epochs=1000, scaler='iron'), 
                            features=train_features)
mlp_clf.fit(data_train, data_train.Signal)

In [None]:
roc_auc_score_one_vs_all(data_test.Signal, mlp_clf.predict_proba(data_test), data_test.Weight)

In [None]:
roc_auc_score_one_vs_all(data_train.Signal, mlp_clf.predict_proba(data_train), data_train.Weight)

## Defining one more network over hep_ml.nnet

In [None]:
from hep_ml.nnet import MLPMultiClassifier, MLPBase

In [None]:
import theano.tensor as T
from hep_ml.nnet import theano, floatX
class FamiliesMultiClassifier(MLPMultiClassifier):
    """
    Has two layers = one over each subsystem, and one is global.
    """
    def prepare(self):
        print self.layers_
        n_input, subsystem_sizes, size2, size3, n_output = self.layers_
        
        subsystem_offsets = numpy.cumsum([0] + list(subsystem_sizes))
        assert n_input == sum(subsystem_sizes) + 1
        W1s = []
        subsystem_output = 10
        hidden1_size = subsystem_output * len(subsystem_sizes)
        for i, subsystem_size in enumerate(subsystem_sizes):
            W1s.append(self._create_matrix_parameter('W1_{}'.format(i), subsystem_size, subsystem_output))
        W2 = self._create_matrix_parameter('W2', hidden1_size, size2)
        W3 = self._create_matrix_parameter('W3', size2, size3)
        W4 = self._create_matrix_parameter('W4', size3, n_output)

        shift = theano.shared(value=self.random_state_.normal(size=[hidden1_size]).astype(floatX) * 0.01, name='shift')
        self.parameters['shift'] = shift

        def my_activation(x):
            outputs = []
            for i, W1_ in enumerate(W1s):
                output = T.dot(x[:, subsystem_offsets[i]:subsystem_offsets[i + 1]], W1_)
                outputs.append(output)

            layer1 = T.tanh(T.concatenate(outputs, axis=1) + shift)
            layer2 = T.tanh(T.dot(layer1, W2))
            layer3 = T.tanh(T.dot(layer2, W3))
            layer4 = T.dot(layer3, W4)
            return layer4    
                
        return my_activation

In [None]:
%%time
layers = [[len(x) for x in feature_families.values()], 30, 20]
fmc_clf = SklearnClassifier(FamiliesMultiClassifier(layers=layers, epochs=1000, scaler='iron'), 
                            features=train_features)
fmc_clf.fit(data_train, data_train.Signal)

In [None]:
roc_auc_score_one_vs_all(data_test.Signal, fmc_clf.predict_proba(data_test), data_test.Weight)

In [None]:
roc_auc_score_one_vs_all(data_train.Signal, fmc_clf.predict_proba(data_train), data_train.Weight)

# Keras

### Preprocessing

In [16]:
from hep_ml.preprocessing import IronTransformer

iron_scaler = IronTransformer(symmetrize=True)
iron_scaler.fit(data_train[train_features])

ironed_trainX = iron_scaler.transform(data_train[train_features]).values.astype('float32')
ironed_testX = iron_scaler.transform(data_test[train_features]).values.astype('float32')

trainY = data_train.Signal.values

In [17]:
from sklearn.preprocessing import StandardScaler
st_scaler = StandardScaler()
st_scaler.fit(data_train[train_features])

StandardScaler(copy=True, with_mean=True, with_std=True)

In [18]:
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import BatchNormalization
from keras.models import Sequential
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils

Using Theano backend.


## Single layer with StandardScaler

In [19]:
def prepare_1layer_model(input_dim, hidden_dim):
    model = Sequential()
    model.add(Dense(hidden_dim, input_dim=input_dim))
    model.add(Activation('tanh'))

    n_classes = len(set(trainY))
    model.add(Dense(n_classes))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=Adam())
    return model

In [20]:
keras_no_iron_clf = prepare_1layer_model(input_dim=len(train_features), hidden_dim=150)

In [21]:
with open('log_nn_base', 'w') as f:
    f.write("Simple NN, standard scaler\n")

In [22]:
%%time

for epochs in range(15):
    keras_no_iron_clf.fit(st_scaler.transform(data_train[train_features]), np_utils.to_categorical(trainY), 
                          verbose=0, nb_epoch=5, batch_size=256)
    p = keras_no_iron_clf.predict_proba(st_scaler.transform(data_test[train_features]), verbose=False)
    auc = roc_auc_score_one_vs_all(data_test.Signal, p, data_test.Weight).values
    with open('log_nn_base', 'a') as f:
        f.write("{}\n".format(auc))



CPU times: user 4h 28min 31s, sys: 12h 32min 45s, total: 17h 1min 16s
Wall time: 2h 33min 21s


## Single layer

In [23]:
keras_onelayer_clf = prepare_1layer_model(input_dim=len(train_features), hidden_dim=150)

In [24]:
with open('log_nn_base', 'a') as f:
    f.write("Simple NN, iron scaler\n")

In [25]:
%%time
for epochs in range(15):
    keras_onelayer_clf.fit(ironed_trainX, np_utils.to_categorical(trainY), 
                           verbose=0, nb_epoch=5, batch_size=256)
    p = keras_onelayer_clf.predict_proba(ironed_testX, verbose=False)
    auc = roc_auc_score_one_vs_all(data_test.Signal, p, data_test.Weight).values
    with open('log_nn_base', 'a') as f:
        f.write("{}\n".format(auc))

CPU times: user 3h 3min 35s, sys: 7h 43min 11s, total: 10h 46min 47s
Wall time: 1h 37min


In [26]:
save_predictions(keras_onelayer_clf.predict_proba(iron_scaler.transform(data_test_full[train_features]).values, verbose=False),
                 filename='./models/keras_onelayer_probs.pkl')

## Deep learning

In [27]:
def prepare_deep_model(input_dim):
    model = Sequential()
    model.add(Dense(300, input_dim=input_dim))
    model.add(Dropout(0.1))
    model.add(Activation('relu'))

    model.add(Dense(300))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Activation('relu'))

    model.add(Dense(400))
    model.add(Dropout(0.5))
    model.add(Activation('relu'))

    n_classes = len(set(trainY))

    model.add(Dense(n_classes))
    model.add(Activation('softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer=Adam())
    return model

In [28]:
keras_dl_clf = prepare_deep_model(len(train_features))

In [None]:
with open('log_nn_base', 'a') as f:
    f.write("Deep learning\n")

In [None]:
%%time
for epochs in range(5):
    keras_dl_clf.fit(ironed_trainX, np_utils.to_categorical(trainY), 
                     verbose=0, nb_epoch=10, batch_size=256)

    p = keras_dl_clf.predict_proba(ironed_testX, verbose=False)
    val = roc_auc_score_one_vs_all(data_test.Signal, p, data_test.Weight).values
    with open('log_nn_base', 'a') as f:
        f.write('{}\n'.format(val))

In [None]:
save_predictions(keras_dl_clf.predict_proba(iron_scaler.transform(data_test_full[train_features]).values, verbose=False),
                 filename='./models/keras_dl_probs.pkl')

### + pv features

In [None]:
from hep_ml.preprocessing import IronTransformer

iron_scaler_pv = IronTransformer(symmetrize=True)
iron_scaler_pv.fit(data_train[train_features + pv_features])

ironed_trainX_pv = iron_scaler.transform(data_train[train_features + pv_features]).values.astype('float32')
ironed_testX_pv = iron_scaler.transform(data_test[train_features + pv_features]).values.astype('float32')

In [None]:
keras_dl_clf_pv = prepare_deep_model(len(train_features) + len(pv_features))

In [None]:
with open('log_nn_pv', 'w') as f:
    f.write('Deep nn, + pv\n')

In [None]:
%%time
for epochs in range(5):
    keras_dl_clf_pv.fit(ironed_trainX_pv, np_utils.to_categorical(trainY), 
                     verbose=0, nb_epoch=10, batch_size=256)

    p = keras_dl_clf_pv.predict_proba(ironed_testX_pv, verbose=False)
    val = roc_auc_score_one_vs_all(data_test.Signal, p, data_test.Weight).values
    with open('log_nn_pv', 'a') as f:
        f.write('{}\n'.format(val))

In [None]:
save_predictions(keras_dl_clf_pv.predict_proba(iron_scaler.transform(data_test_full[train_features + pv_features]).values,
                                               verbose=False),
                 filename='./models/keras_dl_probs_pv.pkl')

## Keras with sublayers

In [None]:
from keras.layers import Layer
from keras import backend as K

class SublayersLayer(Layer):
    def __init__(self, sizes, output_dim, **kwargs):
        self.output_dim = output_dim
        self.sizes = sizes
        super(SublayersLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        input_dim = input_shape[1]
        self.trainable_weights = []
        for i in range(len(self.sizes) - 1):
            matrix_size = (self.sizes[i+1] - self.sizes[i], self.output_dim)
            initial_weight_value = numpy.random.random(size=matrix_size)
            W = K.variable(initial_weight_value)
            self.trainable_weights.append(W)
            # print matrix_size
        self.trainable_weights.append(K.zeros((self.output_dim * (len(self.sizes) - 1),)))

    def call(self, x, mask=None):
        sublayers_outputs = []
        assert len(self.trainable_weights) == (len(self.sizes) - 1) + 1
        for i in range(len(self.sizes) - 1):
            w = self.trainable_weights[i]
            sublayers_outputs.append(K.dot(x[:, self.sizes[i]:self.sizes[i+1]], w))
   
        return K.concatenate(sublayers_outputs, axis=1) + self.trainable_weights[-1]

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], self.output_dim * (len(self.sizes) - 1))

In [None]:
from theano import tensor
from keras.layers import InputLayer

In [None]:
def prepare_sublayers(input_dim, feature_families):
    model = Sequential()
    model.add(Activation('linear', input_shape=[input_dim]))

    sublayer_sizes = numpy.cumsum([0] + map(len, feature_families.values()))
    print sublayer_sizes
    model.add(SublayersLayer(sizes=sublayer_sizes, output_dim=50))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))

    model.add(Dense(300))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Activation('relu'))

    model.add(Dense(400))
    model.add(Dropout(0.5))
    model.add(Activation('relu'))

    n_classes = len(set(trainY))

    model.add(Dense(n_classes))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=Adam())
    return model

In [None]:
keras_sublayers_clf = prepare_sublayers(len(train_features), feature_families=feature_families)

In [None]:
with open('log_nn_base', 'a') as f:
    f.write("Sublayers\n")

In [None]:
%%time
for epochs in range(5):
    keras_sublayers_clf.fit(ironed_trainX, np_utils.to_categorical(trainY), 
                     verbose=0, nb_epoch=10, batch_size=256)

    p = keras_sublayers_clf.predict_proba(ironed_testX, verbose=False)
    sublayers_auc = roc_auc_score_one_vs_all(data_test.Signal, p, data_test.Weight).values
    with open('log_nn_base', 'a') as f:
        f.write('{}\n'.format(sublayers_auc))

In [None]:
save_predictions(keras_sublayers_clf.predict_proba(iron_scaler.transform(data_test_full[train_features]).values, verbose=False),
                 filename='./models/keras_sublayers_probs.pkl')

## Keras for staged training

In [None]:
def generate_small_model(input_dim, hidden_layer):
    model = Sequential()
    model.add(Dense(hidden_layer, input_dim=input_dim))
    # added dropout in order not to care about folding :)
    model.add(Dropout(p=0.3))
    model.add(Activation('tanh'))

    n_classes = len(set(trainY))
    model.add(Dense(n_classes))
    model.add(Activation('softmax'))

    keras_onelayer_clf = model
    keras_onelayer_clf.compile(loss='categorical_crossentropy', optimizer=Adam())
    return keras_onelayer_clf

In [None]:
with open('log_nn_base', 'a') as f:
    f.write("Stacking\n")

In [None]:
%%time
family_models = {}
for family_name, family_features in feature_families.items():
    family_model = generate_small_model(len(family_features), hidden_layer=50)
    family_models[family_name] = family_model
    family_model.fit(pandas.DataFrame(ironed_trainX, columns=train_features)[family_features].values, 
                     np_utils.to_categorical(trainY), 
                     verbose=0, nb_epoch=20, batch_size=256)
    print 'done'    

In [None]:
for family_name, family_features in feature_families.items():
    family_model = family_models[family_name]
    test_pred = family_model.predict_proba(pandas.DataFrame(ironed_testX, columns=train_features)[family_features].values, verbose=False)
    print roc_auc_score_one_vs_all(data_test.Signal, test_pred, data_test.Weight).values

In [None]:
def collect_features(data):
    ironed_data = iron_scaler.transform(data[train_features])
    joint_data = [ironed_data]
    for family_name, family_features in feature_families.items():
        family_model = family_models[family_name]
        pred = family_model.predict_proba(ironed_data[family_features].values, verbose=False)
        joint_data.append(pandas.DataFrame(pred))
    return pandas.concat(joint_data, axis=1)

In [None]:
train_full = collect_features(data_train)
test_full = collect_features(data_test)

In [None]:
def prepare_combiner_model(input_dim):
    model = Sequential()

    model.add(Dense(300, input_dim=input_dim))
    model.add(Dropout(0.3))
    model.add(Activation('relu'))

    model.add(Dense(400))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Activation('relu'))

    n_classes = len(set(trainY))

    model.add(Dense(n_classes))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=Adam())
    return model

In [None]:
keras_combiner_clf = prepare_combiner_model(train_full.shape[1])

In [None]:
%%time 
for epochs in range(5):
    keras_combiner_clf.fit(train_full.values, np_utils.to_categorical(trainY), verbose=0, nb_epoch=5, batch_size=256)
    p = keras_combiner_clf.predict_proba(test_full.values, verbose=False)
    print roc_auc_score_one_vs_all(data_test.Signal, p, data_test.Weight).values

In [None]:
save_predictions(keras_combiner_clf.predict_proba(collect_features(data_test_full).values, verbose=False),
                 filename='./models/keras_subdetectors_probs.pkl')