In [1]:
import pandas as pd
from sklearn.metrics import roc_auc_score,matthews_corrcoef
from sklearn.model_selection import train_test_split
import time
import logging
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score,StratifiedKFold
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, roc_curve,auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
import tensorflow as tf



In [2]:
# Set a random seed for reproducibility
random_seed = 42
np.random.seed(random_seed)

# Suppress LightGBM output
logging.getLogger('lightgbm').setLevel(logging.ERROR)

# Load training data
df_train = pd.read_csv('mixup_mlo_only_train_dataset_80.csv')
df_test = pd.read_csv('mlo_only_test_dataset_10.csv')

X_train = df_train.drop('diagnosis', axis=1)
y_train = df_train['diagnosis']

scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)



# Load test data

df_test = df_test.drop(df_test.loc[:, ['volume', 'case', 'image_name', 'side','view','segment']], axis=1)
X_test = df_test.drop('diagnosis', axis=1)
y_test = df_test['diagnosis']

X_test = scaler.transform(X_test)

# Reshape data for 1D-CNN
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [3]:
# Initialize lists to store metrics
fpr_list, tpr_list, mcc_list, auc_list = [], [], [], []

for run in range(30):
    # Define the CNN model
    input_layer = Input(shape=(X_train.shape[1], 1))
    conv1_1 = Conv1D(filters=128, kernel_size=3, activation='relu')(input_layer)
    pool1 = MaxPooling1D(pool_size=2)(conv1_1)
    flatten = Flatten()(conv1_1)
    fc1 = Dense(512, activation='relu')(flatten)
    dropout1 = Dropout(0.5)(fc1)
    dropout2 = Dropout(0.2)(dropout1)
    batch_norm = BatchNormalization()(dropout2)
    output_layer = Dense(1, activation='sigmoid')(batch_norm)
    model = Model(inputs=input_layer, outputs=output_layer)

    # Compile the model with Adam optimizer and binary cross-entropy loss
    optimizer = Adam(learning_rate=0.0001)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    # Use early stopping to prevent overfitting
    early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)

    #Train model without validation
    model.fit(X_train, y_train, epochs=5, batch_size=64,callbacks=[early_stopping])


    # Make predictions on the test set
    y_pred_proba = model.predict(X_test)

    y_pred = np.round(y_pred_proba)

    y_pred_binary = (y_pred_proba > 0.5).astype(int)

    # Calculate Matthews correlation coefficient (MCC)
    mcc_value = matthews_corrcoef(y_test, y_pred_binary)

    # Calculate evaluation metrics
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc1 = roc_auc_score(y_test, y_pred_proba)

    # Compute ROC curve
    fpr, tpr, thresholds = roc_curve(y_test, y_pred)
    roc_auc = auc(fpr, tpr)

    # Find the index of the threshold closest to 0.5
    threshold_value = 0.5
    index_closest_threshold = np.argmin(np.abs(thresholds - threshold_value))

    # Print FPR and TPR at the specified threshold
    fpr_at_threshold = fpr[index_closest_threshold]
    tpr_at_threshold = tpr[index_closest_threshold]

    print(f"At threshold {threshold_value:.2f}: FPR = {fpr_at_threshold:.4f}, TPR = {tpr_at_threshold:.4f}")

    # Print or store the metrics as needed
    metrics_dict = {
        'AUC': auc1,
        'MCC': mcc_value,
        'FPR': fpr_at_threshold,
        'TPR': tpr_at_threshold
    }

    # Append metrics to lists
    fpr_list.append(fpr_at_threshold)
    tpr_list.append(tpr_at_threshold)
    mcc_list.append(mcc_value)
    auc_list.append(auc1)

# Create a DataFrame to store the metrics
results_df = pd.DataFrame({
    'Run': np.arange(1, 31),
    'FPR': fpr_list,
    'TPR': tpr_list,
    'MCC': mcc_list,
    'AUC': auc_list
})

print(results_df)

#Save the DataFrame to a CSV file
results_df.to_csv('mixup_mlo.csv', index=False)

Epoch 1/30
13/63 [=====>........................] - ETA: 1:22 - loss: 0.4361 - accuracy: 0.8209

KeyboardInterrupt: 