## LINCS MOA classifcation for LINCS curated using Feed Forward Neural Network

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing  import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score

In [2]:
file_prostate=pd.read_hdf("/opt/raid10/genomics/rashid/GCN/data/GSE92742_fully_restricted.hdf")

In [3]:
df_prostate=file_prostate.reset_index(level=[0,1,2,3,4,5,6]) #change from multi index to single index

In [4]:
prostate_y=df_prostate["moa"].values #change moa column to numpy array
df_prostate= df_prostate.drop(columns=["pert_id","pert_name","cell_id","primary_site","subtype","moa","Fold"])
prostate_X=df_prostate.values #get the matrix of feature values for 978 genes


In [5]:
labels,uniques=pd.factorize(prostate_y) #labels are the encodings
prostate_y=labels

In [6]:
sc=StandardScaler()
prostate_X=sc.fit_transform(prostate_X) #normalize feature data

In [7]:
#SPLIT DATA
X_train,X_test,y_train,y_test=train_test_split(prostate_X,prostate_y,test_size=0.2,random_state=42)#split into train and test

In [8]:
mlpc=MLPClassifier(hidden_layer_sizes=(997),max_iter=350) #max_iter goes through the layers 350 times(forward pass and backpropagation) to adjust weights
mlpc.fit(X_train,y_train)
pred_mlpc=mlpc.predict(X_test) #around 5 mins tops

In [9]:
#METRICS
print(confusion_matrix(y_test,pred_mlpc))
print(classification_report(y_test,pred_mlpc))
acc_mlpc=accuracy_score(y_test,pred_mlpc) #result for svm
print(acc_mlpc) #print precision score 
#results matches in the paper where accuracy is 68.3 ± 0.60, macro F1 is 50.4 ± 0.71. Took 4-5 minutes on macbook pro 2.7 GHz inter core i5 processor, 16 GB 1867 MHz DDR3 Memomry, Intel Iris Graphics 6100 1536 MB Graphics

[[  26   31   13 ...   12  113    5]
 [  17   70   25 ...   12  126    2]
 [  17   37   66 ...   27  112    5]
 ...
 [  14   23   15 ... 2285  243  425]
 [  84  139   87 ...  178 2587   32]
 [   5    5    2 ...  341   32  379]]
              precision    recall  f1-score   support

           0       0.06      0.04      0.05       583
           1       0.08      0.10      0.09       710
           2       0.12      0.10      0.11       634
           3       0.49      0.29      0.36       453
           4       0.05      0.03      0.04       205
           5       0.07      0.08      0.08       588
           6       0.18      0.16      0.17       212
           7       0.06      0.06      0.06       441
           8       0.81      0.73      0.77       859
           9       0.05      0.06      0.06       433
          10       0.04      0.05      0.04       337
          11       0.03      0.04      0.03       290
          12       0.07      0.06      0.07       487
          13   

In [10]:
from sklearn.externals import joblib
joblib.dump(mlpc,'FFNN_MOA_FullCurated_notTuned')

['FFNN_MOA_FullCurated_notTuned']

In [11]:
mlpc_benchmark=MLPClassifier(beta_1= 0.919473939313036, activation='relu', alpha= 1.6879174708893805, epsilon=9.7045902021921481e-10, solver= 'sgd', power_t= 0.33021989249044037, learning_rate_init= 0.10898026569061127, hidden_layer_sizes= (955), max_iter= 164, beta_2= 0.99919431656457547, learning_rate= 'adaptive', momentum= 0.86370430284520194, early_stopping= True, nesterovs_momentum= True) #max_iter goes through the layers 350 times(forward pass and backpropagation) to adjust weights
mlpc_benchmark.fit(X_train,y_train)
pred_mlpc_benchmark=mlpc_benchmark.predict(X_test) #around 5 mins tops



In [12]:
#METRICS
print(confusion_matrix(y_test,pred_mlpc_benchmark))
print(classification_report(y_test,pred_mlpc_benchmark))
acc_mlpc_benchmark=accuracy_score(y_test,pred_mlpc_benchmark) #result for svm
print(acc_mlpc_benchmark) #print precision score 
#results matches in the paper where accuracy is 68.3 ± 0.60, macro F1 is 50.4 ± 0.71. Took 4-5 minutes on macbook pro 2.7 GHz

[[   0    8    5 ...   35  410    2]
 [   0   19    6 ...   39  455    3]
 [   0    9   51 ...   51  409    0]
 ...
 [   0    4    1 ... 2833  508   47]
 [   0   11    5 ...  268 4688    7]
 [   0    0    0 ...  641   54  134]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       583
           1       0.09      0.03      0.04       710
           2       0.29      0.08      0.13       634
           3       0.57      0.32      0.41       453
           4       0.00      0.00      0.00       205
           5       0.11      0.00      0.01       588
           6       0.84      0.10      0.18       212
           7       0.08      0.00      0.00       441
           8       0.86      0.74      0.79       859
           9       0.00      0.00      0.00       433
          10       0.00      0.00      0.00       337
          11       0.00      0.00      0.00       290
          12       0.14      0.01      0.02       487
          13   

  'precision', 'predicted', average, warn_for)


In [13]:
joblib.dump(mlpc_benchmark,'FFNN_MOA_FullCurated_Tuned')

['FFNN_MOA_FullCurated_Tuned']