#0-Libraries

In [1]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import itertools
import pandas as pd
from sklearn.model_selection import train_test_split
import sklearn
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix,classification_report,ConfusionMatrixDisplay,matthews_corrcoef
from sklearn.preprocessing import Normalizer
from sklearn.compose import ColumnTransformer
import tensorflow as tf
from tensorflow import keras
from keras import regularizers
from keras.backend import expand_dims
from keras.models import load_model
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.models import Sequential
from keras.constraints import MaxNorm
from keras.layers import Activation, Dense, Conv1D, Flatten, MaxPooling1D, Dropout, BatchNormalization

METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

#1-Load Data and Model

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
model_from_driams_a = load_model('/content/drive/MyDrive/Colab Notebooks/DRIAMS/s_aureus_oxacillin_x.h5')
model_from_driams_a.summary()

Model: "Modelo_s_aureus_oxacillin"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Conv_1 (Conv1D)             (None, 5984, 64)          1152      
                                                                 
 batch_normalization_8 (Batc  (None, 5984, 64)         256       
 hNormalization)                                                 
                                                                 
 activation_8 (Activation)   (None, 5984, 64)          0         
                                                                 
 MaxPooling1D (MaxPooling1D)  (None, 2992, 64)         0         
                                                                 
 Conv_2 (Conv1D)             (None, 2984, 128)         73856     
                                                                 
 batch_normalization_9 (Batc  (None, 2984, 128)        512       
 hNormalization)                         

In [4]:
s_aureus_driams_b = pd.read_csv('/content/drive/MyDrive/New driams databae/Datasets Driams con espectro de masa/Driams_b/s_aureus_driams_b_bin3_2000_20000Da.csv')
s_aureus_driams_b.head()

Unnamed: 0,2000,2003,2006,2009,2012,2015,2018,2021,2024,2027,...,19991,19994,19997,code,species,Oxacillin,Clindamycin,Ceftriaxone,Ciprofloxacin,Fusidic acid
0,3894.285714,4288.428571,3771.714286,5134.714286,3902.142857,3062.571429,3026.0,3078.857143,3751.875,3582.142857,...,19.666667,18.0,16.2,379e3abe-c5b2-4f92-8f2f-0c9dd0a2c7b0,Staphylococcus aureus,0.0,0.0,,0.0,0.0
1,7327.714286,7367.0,9050.714286,9410.285714,8567.571429,9221.0,7407.857143,7006.857143,6694.142857,6969.714286,...,246.5,226.0,241.820755,eed06320-c82a-43a2-ad35-139e4e082044,Staphylococcus aureus,0.0,0.0,,0.0,0.0
2,5981.142857,6145.0,7768.75,6982.142857,6709.428571,6847.857143,5945.285714,5704.428571,6554.25,6829.0,...,178.0,186.0,189.74359,1b1e94b9-f2cc-42ec-91e1-e5c3bef4adc7,Staphylococcus aureus,0.0,1.0,,0.0,0.0
3,3470.142857,3477.0,2912.714286,3249.714286,2469.142857,2462.714286,2484.714286,2528.0,2918.375,2667.0,...,74.666667,90.5,96.5,e6cf028f-0960-4751-9ca6-d94f90e07ae6,Staphylococcus aureus,0.0,0.0,,0.0,0.0
4,1564.625,1984.857143,1563.0,1842.0,1406.714286,1411.428571,1319.0,1277.857143,1445.571429,1616.0,...,15.5,23.5,21.529412,5ea281ba-f7c8-43a7-a17f-43ac77ed7f68,Staphylococcus aureus,0.0,0.0,,0.0,0.0


#2-Training model with only external data 

In [5]:
s_aureus_oxacillin_driams_b = s_aureus_driams_b.drop(columns=['code','species', 'Clindamycin', 'Ceftriaxone', 'Ciprofloxacin', 'Fusidic acid']) 
s_aureus_oxacillin_driams_b.dropna(axis=0, how="any", inplace=True)

In [6]:
X = s_aureus_oxacillin_driams_b.iloc[:, 0:6000].values  # variables independientes (espectros de masa)
y = s_aureus_oxacillin_driams_b.iloc[:, 6000].values    # variable dependientes (resistencia a ciprofloxacin)
X = np.asarray(X).astype(np.float32)
y = np.asarray(y).astype(np.float32)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify=y)

In [8]:
scaler=Normalizer(norm='max')
sc_X = scaler
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

In [9]:
sample_size = X_train.shape[0] # numero de muestras en el set de datos
time_steps  = X_train.shape[1] # numero de atributos en el set de datos
input_dimension = 1            #

X_train_reshaped = X_train.reshape(sample_size,time_steps,input_dimension)
X_test_reshaped = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [10]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=0.000001)
early_st = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)

n_timesteps = X_train_reshaped.shape[1] #
n_features  = X_train_reshaped.shape[2] #

## create and fit DeepAMR model

In [11]:
model = Sequential(name="Modelo_s_aureus_ciprofloxacin")
init_mode = 'normal'
model.add(Conv1D(filters=(64), kernel_size=(17), input_shape = (n_timesteps,n_features), name='Conv_1'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2, name="MaxPooling1D_1"))

model.add(Conv1D(filters=(128), kernel_size=(9),kernel_initializer=init_mode, kernel_regularizer=regularizers.l2(0.0001),  name='Conv_2'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2, name="MaxPooling1D_2"))

model.add(Conv1D(filters=(256), kernel_size=(5),kernel_initializer=init_mode,kernel_regularizer=regularizers.l2(0.0001),   name='Conv_3'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2, name="MaxPooling1D_3"))

model.add(Conv1D(filters=(256), kernel_size=(5),kernel_initializer=init_mode, kernel_regularizer=regularizers.l2(0.0001),   name='Conv_4'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2, name="MaxPooling1D_4"))

model.add(Flatten())
model.add(Dropout(0.65))
model.add(Dense(256, activation='relu',kernel_initializer=init_mode, kernel_regularizer=regularizers.l2(0.0001), name="fully_connected_0"))
model.add(Dense(64, activation='relu',kernel_initializer=init_mode, kernel_regularizer=regularizers.l2(0.0001), name="fully_connected_1"))
model.add(Dense(64, activation='relu',kernel_initializer=init_mode, kernel_regularizer=regularizers.l2(0.0001),  name="fully_connected_2"))
model.add(Dense(n_features, activation='sigmoid', name="OUT_Layer"))

model.compile(optimizer = Adam(learning_rate=0.0001), loss = 'binary_crossentropy',  metrics=METRICS)
model.summary()

Model: "Modelo_s_aureus_ciprofloxacin"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Conv_1 (Conv1D)             (None, 5984, 64)          1152      
                                                                 
 batch_normalization (BatchN  (None, 5984, 64)         256       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 5984, 64)          0         
                                                                 
 MaxPooling1D_1 (MaxPooling1  (None, 2992, 64)         0         
 D)                                                              
                                                                 
 Conv_2 (Conv1D)             (None, 2984, 128)         73856     
                                                                 
 batch_normalization_1 (Batc  (None, 

In [12]:
history = model.fit(X_train_reshaped, y_train, epochs=100, batch_size=10, verbose=1, validation_split=0.1, callbacks=[reduce_lr,early_st])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100


In [13]:
y_pred  = model.predict(X_test_reshaped)
y_pred = (y_pred>0.5)
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[66  0]
 [ 3  1]]


In [20]:
target_names=["0","1"]
print(classification_report(y_test, y_pred, target_names=target_names))

train_predictions_baseline = model.predict(X_train_reshaped, batch_size=10)
test_predictions_baseline = model.predict(X_test_reshaped, batch_size=10)

baseline_results = model.evaluate(X_test_reshaped, y_test, verbose=0)
for name, value in zip(model.metrics_names, baseline_results):
  print(name, ': ', value)
print()

              precision    recall  f1-score   support

           0       0.95      0.79      0.86        66
           1       0.07      0.25      0.11         4

    accuracy                           0.76        70
   macro avg       0.51      0.52      0.48        70
weighted avg       0.90      0.76      0.82        70

loss :  4.999644756317139
tp :  1.0
fp :  0.0
tn :  66.0
fn :  3.0
accuracy :  0.9571428298950195
precision :  1.0
recall :  0.25
auc :  0.6136363744735718
prc :  0.31139081716537476



#3-Test external data on DRIAMS-A pretrained model 

In [21]:
y_pred  = model_from_driams_a.predict(X_test_reshaped)
y_pred = (y_pred>0.5)
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[52 14]
 [ 3  1]]


In [22]:
target_names=["0","1"]
print(classification_report(y_test, y_pred, target_names=target_names))

train_predictions_baseline = model_from_driams_a.predict(X_train_reshaped, batch_size=10)
test_predictions_baseline = model_from_driams_a.predict(X_test_reshaped, batch_size=10)

baseline_results = model_from_driams_a.evaluate(X_test_reshaped, y_test, verbose=0)
for name, value in zip(model_from_driams_a.metrics_names, baseline_results):
  print(name, ': ', value)
print()

              precision    recall  f1-score   support

           0       0.95      0.79      0.86        66
           1       0.07      0.25      0.11         4

    accuracy                           0.76        70
   macro avg       0.51      0.52      0.48        70
weighted avg       0.90      0.76      0.82        70

loss :  1.2195426225662231
tp :  1.0
fp :  14.0
tn :  52.0
fn :  3.0
accuracy :  0.7571428418159485
precision :  0.06666667014360428
recall :  0.25
auc :  0.6571968793869019
prc :  0.1281694918870926



#4-Test external data applying transfer learning, freezing convolutional layers.

In [32]:
model1 = Sequential()
for layer  in model_from_driams_a.layers[:-4]:
  model1.add(layer)
model1.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Conv_1 (Conv1D)             (None, 5984, 64)          1152      
                                                                 
 batch_normalization_8 (Batc  (None, 5984, 64)         256       
 hNormalization)                                                 
                                                                 
 activation_8 (Activation)   (None, 5984, 64)          0         
                                                                 
 MaxPooling1D (MaxPooling1D)  (None, 2992, 64)         0         
                                                                 
 Conv_2 (Conv1D)             (None, 2984, 128)         73856     
                                                                 
 batch_normalization_9 (Batc  (None, 2984, 128)        512       
 hNormalization)                                      

In [33]:
init_mode = 'normal'
for layer in model1.layers:
  layer.trainable = False
model1.add(Dense(256, activation='relu',kernel_initializer=init_mode, kernel_regularizer=regularizers.l2(0.0001), name="fully_connected_0"))
model1.add(Dense(64, activation='relu',kernel_initializer=init_mode, kernel_regularizer=regularizers.l2(0.0001), name="fully_connected_1"))
model1.add(Dense(64, activation='relu',kernel_initializer=init_mode, kernel_regularizer=regularizers.l2(0.0001),  name="fully_connected_2"))
model1.add(Dense(1, activation='sigmoid', name="OUT_Layer"))

In [34]:
model1.compile(optimizer = Adam(learning_rate=0.0000001), loss = 'binary_crossentropy',  metrics=METRICS)

In [35]:
model1.fit(X_train_reshaped, y_train, epochs=50,batch_size=10, verbose=1, validation_split=0.1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fee662b9d60>

In [36]:
y_pred  = model1.predict(X_test_reshaped)
y_pred = (y_pred>0.5)

print("MCC e_coli ciprofloxacin E_T driams B:",matthews_corrcoef(y_test, y_pred))

MCC e_coli ciprofloxacin E_T driams B: 0.0


In [37]:
Ex_test_D_B = model1.evaluate(X_test_reshaped, y_test, verbose=0)
for name, value in zip(model1.metrics_names, Ex_test_D_B):
  print(name, ': ', value)
print()

loss :  6.498785972595215
tp :  0.0
fp :  0.0
tn :  66.0
fn :  4.0
accuracy :  0.9428571462631226
precision :  0.0
recall :  0.0
auc :  0.469696968793869
prc :  0.059333622455596924



#5-Test external data applying transfer learning, retraining all layers.

In [38]:
model_from_driams_a.compile(optimizer = Adam(learning_rate=0.0000001), loss = 'binary_crossentropy',  metrics=METRICS)


In [39]:
model_from_driams_a.fit(X_train_reshaped, y_train, epochs=50,batch_size=10, verbose=1, validation_split=0.1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fee0802bd00>

In [40]:
y_pred  = model_from_driams_a.predict(X_test)
y_pred = (y_pred>0.5)

print("MCC e_coli ciprofloxacin E_T driams B:",matthews_corrcoef(y_test, y_pred))

MCC e_coli ciprofloxacin E_T driams B: 0.08930808589059785


In [41]:
Ex_test_D_B = model_from_driams_a.evaluate(X_test_reshaped, y_test,verbose=0)
for name, value in zip(model_from_driams_a.metrics_names, Ex_test_D_B):
  print(name, ': ', value)
print()

loss :  1.046297550201416
tp :  1.0
fp :  8.0
tn :  58.0
fn :  3.0
accuracy :  0.8428571224212646
precision :  0.1111111119389534
recall :  0.25
auc :  0.6571969389915466
prc :  0.11039191484451294

