In [1]:
import os 
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.densenet import preprocess_input as densenet_preprocess_input

In [2]:
# Load and preprocess the data
def load_and_preprocess_data(data):
    images = []
    labels = []
    for label, image_paths in data.items():
        for path in image_paths:
            img = cv2.imread(str(path))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert image to RGB format
            img = cv2.resize(img, (224, 224))  # Resize image to VGG16 input size
            images.append(img)
            labels.append(label)
    images = np.array(images)
    labels = np.array(labels)
    return images, labels

In [3]:
# Load data
path = pathlib.Path(r"C:\Users\gauth\Desktop\jobs\EHR\ML model\kvasir-dataset")

In [4]:
dyed_ifted_polyps = list(path.glob('dyed-lifted-polyps/*'))
dyed_resection_margins = list(path.glob('dyed-resection-margins/*'))
esophagitis = list(path.glob('esophagitis/*'))
normal_cecum = list(path.glob('normal-cecum/*'))
normal_pylorus = list(path.glob('normal-pylorus/*'))
normal_z_line = list(path.glob('normal-z-line/*'))
polyps = list(path.glob('polyps/*'))
ulcerative_colitis = list(path.glob('ulcerative-colitis/*'))

In [5]:
data = {
    'dyed-lifted-polyps' : dyed_ifted_polyps ,
    'dyed-resection-margins' : dyed_resection_margins,
    'esophagitis' : esophagitis,
    'normal-cecum' : normal_cecum,
    'normal-pylorus' : normal_pylorus,
    'normal-z-line' : normal_z_line,
    'polyps' : polyps,
    'ulcerative-colitis' : ulcerative_colitis
}

polys_labels = {
    0:'dyed-lifted-polyps',
    1:'dyed-resection-margins',
    2:'esophagitis',
    3:'normal-cecum',
    4:'normal-pylorus',
    5:'normal-z-line',
    6:'polyps',
    7:'ulcerative-colitis'
}

In [6]:
images, labels = load_and_preprocess_data(data)

In [7]:
# Split the data into training, validation, and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=0.2, random_state=42)

In [8]:
# Convert string labels to numerical labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
val_labels_encoded = label_encoder.transform(val_labels)
test_labels_encoded = label_encoder.transform(test_labels)

In [9]:
# Load DenseNet121 model with pre-trained weights
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers
base_model.trainable = False

# Build your classification model on top of the pre-trained DenseNet model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(8, activation='softmax')  # Adjust the output units based on the number of classes
])

In [10]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Preprocess the data using DenseNet's preprocess_input function
train_data_densenet = densenet_preprocess_input(train_data)
val_data_densenet = densenet_preprocess_input(val_data)
test_data_densenet = densenet_preprocess_input(test_data)

In [11]:
# Train the model
history = model.fit(train_data_densenet, train_labels_encoded, epochs=10, batch_size=32, validation_data=(val_data_densenet, val_labels_encoded))

Epoch 1/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 693ms/step - accuracy: 0.4581 - loss: 1.5342 - val_accuracy: 0.8000 - val_loss: 0.5121
Epoch 2/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 673ms/step - accuracy: 0.7536 - loss: 0.5954 - val_accuracy: 0.8156 - val_loss: 0.4403
Epoch 3/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 765ms/step - accuracy: 0.8294 - loss: 0.4425 - val_accuracy: 0.8594 - val_loss: 0.3671
Epoch 4/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 758ms/step - accuracy: 0.8353 - loss: 0.3945 - val_accuracy: 0.8438 - val_loss: 0.3745
Epoch 5/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 761ms/step - accuracy: 0.8736 - loss: 0.3310 - val_accuracy: 0.8453 - val_loss: 0.3711
Epoch 6/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 756ms/step - accuracy: 0.8819 - loss: 0.3092 - val_accuracy: 0.8578 - val_loss: 0.3520
Epoch 7/10
[1m80/80[

In [12]:
# Evaluate the model
test_loss, test_acc = model.evaluate(test_data_densenet, test_labels_encoded)
print(f'Test Accuracy: {test_acc}')

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 523ms/step - accuracy: 0.9069 - loss: 0.2758
Test Accuracy: 0.8974999785423279


In [13]:
# Model summary
model.summary()

In [19]:
model_architecture = model.to_json()

# Save the model's weights
model_weights = model.get_weights()

# Pickle the model architecture and weights
with open('densenet_model_architecture.pkl', 'wb') as architecture_file:
    pickle.dump(model_architecture, architecture_file)

with open('densenet_model_weights.pkl', 'wb') as weights_file:
    pickle.dump(model_weights, weights_file)

In [30]:
import tensorflow as tf
from tensorflow.keras.models import load_model

# Save the model using SavedModel format
tf.saved_model.save(model, 'dense')
print('Model Saved!')


INFO:tensorflow:Assets written to: dense\assets


INFO:tensorflow:Assets written to: dense\assets


Model Saved!


In [31]:
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model('C:/Users/gauth/Desktop/jobs/EHR/ML model/dense')

# Summary of the loaded model
loaded_model.summary()

ValueError: File format not supported: filepath=C:/Users/gauth/Desktop/jobs/EHR/ML model/dense. Keras 3 only supports V3 `.keras` files and legacy H5 format files (`.h5` extension). Note that the legacy SavedModel format is not supported by `load_model()` in Keras 3. In order to reload a TensorFlow SavedModel as an inference-only layer in Keras 3, use `keras.layers.TFSMLayer(C:/Users/gauth/Desktop/jobs/EHR/ML model/dense, call_endpoint='serving_default')` (note that your `call_endpoint` might have a different name).

In [None]:
# ROC, AUC
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.preprocessing import label_binarize

# Predict probabilities for each class
y_pred_proba = model.predict(test_data_densenet)

# Binarize the true labels
y_test_binarized = label_binarize(test_labels_encoded, classes=np.unique(test_labels_encoded))

# Calculate AUC for each class
roc_auc = roc_auc_score(y_test_binarized, y_pred_proba, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

# Plot ROC curve for each class
n_classes = y_test_binarized.shape[1]
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test_binarized[:, i], y_pred_proba[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plotting
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], lw=2,
             label='ROC curve of class {0} (area = {1:0.2f})'.format(i, roc_auc[i]))

plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic for each class')
plt.legend(loc="lower right")
plt.show()


In [None]:
# Define model creation function
def create_model():
    base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    for layer in base_model.layers:
        layer.trainable = False
    
    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(8, activation='softmax')  # Adjust the output units based on the number of classes
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
# Define the number of classes
num_classes = 8  # Update with the number of classes in your dataset

# Perform cross-validation
num_folds = 5
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)
cv_scores = []

for fold, (train_index, val_index) in enumerate(skf.split(train_data_densenet, train_labels_encoded)):
    print(f"Fold {fold + 1}/{num_folds}")
    
    # Create ResNet50 model
    model = create_model()
    
    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    # Train the model
    history = model.fit(train_data_densenet[train_index], train_labels_encoded[train_index], epochs=10, batch_size=32, validation_data=(train_data_densenet[val_index], train_labels_encoded[val_index]), verbose=1)
    
    # Evaluate the model on validation data
    val_loss, val_acc = model.evaluate(train_data_densenet[val_index], train_labels_encoded[val_index], verbose=0)
    print(f"Validation Accuracy: {val_acc}")
    cv_scores.append(val_acc)

# Calculate and print the mean cross-validation score
mean_cv_score = np.mean(cv_scores)
print(f"Mean Cross-Validation Accuracy: {mean_cv_score}")

In [None]:
# Generate confusion matrix
predictions = model.predict(test_data_densenet)
predicted_labels = np.argmax(predictions, axis=1)

cm = confusion_matrix(test_labels_encoded, predicted_labels)
classes = [str(i) for i in range(8)]  # Assuming you have 8 classes

# Plot confusion matrix
plt.figure(figsize=(8, 8))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()