In [17]:
import pandas as pd
from sklearn.metrics import roc_auc_score,matthews_corrcoef
from sklearn.model_selection import train_test_split
import time
import logging
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score,StratifiedKFold
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, roc_curve,auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping


In [18]:
# Set a random seed for reproducibility
random_seed = 42
np.random.seed(random_seed)

# Suppress LightGBM output
logging.getLogger('lightgbm').setLevel(logging.ERROR)

# Load training data
df_train = pd.read_csv('mlo+deep+mixup+stem.csv')
df_test = pd.read_csv('mlo_only_test_dataset_10.csv')

X_train = df_train.drop('diagnosis', axis=1)
y_train = df_train['diagnosis']

scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)



# Load test data

df_test = df_test.drop(df_test.loc[:, ['volume', 'case', 'image_name', 'side','view','segment']], axis=1)
X_test = df_test.drop('diagnosis', axis=1)
y_test = df_test['diagnosis']

X_test = scaler.transform(X_test)

#adding validation step
#X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)


In [19]:
df_test.size

416150

In [20]:
# Initialize lists to store metrics
fpr_list, tpr_list, mcc_list, auc_list = [], [], [], []

for run in range(30):
    # Build a simple neural network model
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu', kernel_regularizer='l2'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Use early stopping to prevent overfitting
    early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)

    #Train model without validation
    model.fit(X_train, y_train, epochs=5, batch_size=64,callbacks=[early_stopping])


    # Train the model with validation
    #history = model.fit(X_train, y_train, epochs=30, batch_size=64,validation_data=(X_test, y_test),callbacks=[early_stopping])

    #Plot learning curves for training and validation loss
    #plt.plot(history.history['loss'], label='Training Loss')
    #plt.plot(history.history['val_loss'], label='Validation Loss')
    #plt.legend()
    #plt.show()

    # Make predictions on the test set
    y_pred_proba = model.predict(X_test)

    y_pred = np.round(y_pred_proba)

    y_pred_binary = (y_pred_proba > 0.5).astype(int)

    # Calculate Matthews correlation coefficient (MCC)
    mcc_value = matthews_corrcoef(y_test, y_pred_binary)

    # Calculate evaluation metrics
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc1 = roc_auc_score(y_test, y_pred_proba)

    # Compute ROC curve
    fpr, tpr, thresholds = roc_curve(y_test, y_pred)
    roc_auc = auc(fpr, tpr)

    # Find the index of the threshold closest to 0.5
    threshold_value = 0.5
    index_closest_threshold = np.argmin(np.abs(thresholds - threshold_value))

    # Print FPR and TPR at the specified threshold
    fpr_at_threshold = fpr[index_closest_threshold]
    tpr_at_threshold = tpr[index_closest_threshold]

    print(f"At threshold {threshold_value:.2f}: FPR = {fpr_at_threshold:.4f}, TPR = {tpr_at_threshold:.4f}")

    # Print or store the metrics as needed
    metrics_dict = {
        'AUC': auc1,
        'MCC': mcc_value,
        'FPR': fpr_at_threshold,
        'TPR': tpr_at_threshold
    }

    # Append metrics to lists
    fpr_list.append(fpr_at_threshold)
    tpr_list.append(tpr_at_threshold)
    mcc_list.append(mcc_value)
    auc_list.append(auc1)

# Create a DataFrame to store the metrics
results_df = pd.DataFrame({
    'Run': np.arange(1, 31),
    'FPR': fpr_list,
    'TPR': tpr_list,
    'MCC': mcc_list,
    'AUC': auc_list
})


#Save the DataFrame to a CSV file
results_df.to_csv('mlo+deep+mixup+stem_MLP.csv', index=False)






Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0911, TPR = 0.6364
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0938, TPR = 0.5909
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0964, TPR = 0.6364
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0911, TPR = 0.7727
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0911, TPR = 0.7273
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0833, TPR = 0.6364
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0651, TPR = 0.6818
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0807, TPR = 0.6818
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0729, TPR = 0.6364
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
At threshold 0.50: FPR = 0.0885, TPR = 0.6364
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
