In [11]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import MinMaxScaler,StandardScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
import sklearn

In [2]:
new_store = pd.read_hdf("singlepi_e100GeV_pu200Nov7.h5")
prev_store = pd.read_hdf("singlepi_e100GeV_pu200_oct27.h5")

In [3]:
# load the old data of october 27 to use it as test data. 
prev_store['purity']=prev_store['purity'].apply(lambda x: 0 if x <=1 else 1 )
XOct27Test = prev_store.drop(['purity','event','trackster','trckType'],1,inplace=False)
YOct27Test = prev_store[['purity']].iloc[:,0]

In [16]:
df = new_store.drop(['trckType'],1,inplace=False)
df['purity']=df['purity'].apply(lambda x: 0 if x <=1 else 1 )


In [8]:
trainDF=df.sample(frac=0.9,random_state=200) #random state is a seed value
testDF=df.drop(trainDF.index)

x0Train = trainDF.drop(['purity','event','trackster'],1,inplace=False)
x0Test = testDF.drop(['purity','event','trackster'],1,inplace=False)

sc = StandardScaler()
x0Train = sc.fit_transform(x0Train)
x0Test = sc.transform(x0Test)

y0Train = trainDF[['purity']].iloc[:,0]
y0Test =  testDF[['purity']].iloc[:,0]

In [13]:
clf0 = MLPClassifier(solver='lbfgs',random_state=1234).fit(x0Train, y0Train)
y0TestPred = clf0.predict(x0Test) 

print("Testing confusion_matrix")
print(confusion_matrix(y0Test, y0TestPred))

print("Test accuracy")
print(sklearn.metrics.accuracy_score(y0Test, y0TestPred))
print("Test Percision")
print(sklearn.metrics.precision_score(y0Test, y0TestPred))
print("Test recall")
print(sklearn.metrics.recall_score(y0Test, y0TestPred))
print("Test F1 score")
print(sklearn.metrics.f1_score(y0Test, y0TestPred))

Testing confusion_matrix
[[1939   91]
 [  99  459]]
Test accuracy
0.9265842349304482
Test Percision
0.8345454545454546
Test recall
0.8225806451612904
Test F1 score
0.8285198555956679




In [15]:
clf0 = MLPClassifier(solver='sgd',random_state=1234).fit(x0Train, y0Train)
y0TestPred = clf0.predict(x0Test) 

print("Testing confusion_matrix")
print(confusion_matrix(y0Test, y0TestPred))

print("Test accuracy")
print(sklearn.metrics.accuracy_score(y0Test, y0TestPred))
print("Test Percision")
print(sklearn.metrics.precision_score(y0Test, y0TestPred))
print("Test recall")
print(sklearn.metrics.recall_score(y0Test, y0TestPred))
print("Test F1 score")
print(sklearn.metrics.f1_score(y0Test, y0TestPred))

Testing confusion_matrix
[[1937   93]
 [ 134  424]]
Test accuracy
0.9122874806800618
Test Percision
0.8201160541586073
Test recall
0.7598566308243727
Test F1 score
0.7888372093023255


In [20]:
X = df.drop(['purity','event','trackster'],1,inplace=False)
y = df[['purity']].iloc[:,0]

parameter_space = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,), (50,)],
    'activation': ['tanh', 'relu'],
    'solver': ['lbfgs', 'adam'],
    'alpha': [0.0001,0.001,0.01,0.05],
    'learning_rate': ['constant','adaptive'],
    'max_iter':[200,400,800],
    }
grid_search = GridSearchCV(MLPClassifier(), parameter_space, cv=5)
grid_search.fit(X, y)
print(grid_search.best_params_)



{'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'adam'}


In [21]:
# All results
means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))

0.893 (+/-0.028) for {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'lbfgs'}
0.902 (+/-0.022) for {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'adam'}
0.894 (+/-0.027) for {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'constant', 'max_iter': 400, 'solver': 'lbfgs'}
0.895 (+/-0.014) for {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'constant', 'max_iter': 400, 'solver': 'adam'}
0.897 (+/-0.027) for {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'constant', 'max_iter': 800, 'solver': 'lbfgs'}
0.895 (+/-0.016) for {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'constant', 'max_iter': 800, 'solver': 'adam'}
0.889 (+/-0.029) for {'activati