In [2]:
!pip install vit_keras -q

In [3]:
!pip install tensorflow-addons


Collecting tensorflow-addons
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.23.0 typeguard-2.13.3


In [4]:
import os
import cv2
import sys
import random
import warnings
import numpy as np
import pandas as pd
from time import time
from itertools import chain
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage.morphology import label
from skimage.io import imread, imshow, imread_collection, concatenate_images

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.layers import (
    Dense, Input, Dropout, Lambda, Conv2D, Conv2DTranspose, MaxPooling2D, Concatenate,
    Activation, Add, multiply, add, concatenate, LeakyReLU, ZeroPadding2D, UpSampling2D,
    BatchNormalization, SeparableConv2D, Flatten )

from sklearn.metrics import classification_report
%matplotlib inline

from vit_keras import  vit, utils


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [5]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [6]:
PATH = "gdrive/My Drive/Colab Notebooks/AML Project/chest_xray_reshuffle/"

In [7]:
#reading required files
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'val')
test_dir = os.path.join(PATH, 'test')

In [8]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Image dimensions expected by MobileNetV2
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32

# Create ImageDataGenerators
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [9]:
# Flow training images in batches using tf.keras.preprocessing.image.ImageDataGenerator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'  # or 'categorical' if you have more than two classes
)

# Flow validation images in batches using tf.keras.preprocessing.image.ImageDataGenerator
validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

# Flow test images in batches using tf.keras.preprocessing.image.ImageDataGenerator
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

Found 4686 images belonging to 2 classes.
Found 585 images belonging to 2 classes.
Found 585 images belonging to 2 classes.


In [10]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.6-py3-none-any.whl (128 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/128.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m122.9/128.9 kB[0m [31m4.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.9/128.9 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.6 kt-legacy-1.0.5


In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.metrics import Precision, Recall, AUC, BinaryAccuracy
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger, Callback
from tensorflow.keras import backend as K
from keras_tuner import RandomSearch

In [12]:
# Define metrics
precision_metric = Precision()
recall_metric = Recall()
auc_metric = AUC()
auc_pr_metric = AUC(curve='PR')

def F1Score(y_true, y_pred):
    precision = precision_metric(y_true, y_pred)
    recall = recall_metric(y_true, y_pred)
    f1_score = 2 * ((precision * recall) / (precision + recall + tf.keras.backend.epsilon()))
    return f1_score

# Callbacks Setup
checkpoint = ModelCheckpoint('model-best-new-data.h5', save_best_only=True, monitor='val_accuracy', mode='max')
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
csv_logger = CSVLogger('training_log-new-data.csv')

class ResetStatesCallback(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        precision_metric.reset_states()
        recall_metric.reset_states()
        auc_metric.reset_states()
        auc_pr_metric.reset_states()



In [13]:
# Include the ResetStatesCallback in your callbacks list
reset_states_callback = ResetStatesCallback()

In [14]:


# Model Setup with ViT
def build_model(hp):
    model = vit.vit_b16(
        image_size=224,
        activation='sigmoid',  # Change activation based on your task
        pretrained=True,
        include_top=False,
        pretrained_top=False,
        classes=1  # Assuming binary classification, adjust if needed
    )

    # Add Dense layers
    model = tf.keras.Sequential([
        model,
        layers.Dense(units=hp.Int('units', min_value=32, max_value=512, step=32), activation='relu'),
        layers.Dropout(rate=hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)),
        layers.Dense(1, activation='sigmoid')
    ])

    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='binary_crossentropy',
        metrics=[
            'accuracy',
            precision_metric,
            recall_metric,
            F1Score,
            auc_metric,
            auc_pr_metric
        ]
    )

    return model


In [15]:
# Define the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=1,
    directory='my_dir',
    project_name='pneumonia_detection-new-data'
)

Downloading data from https://github.com/faustomorales/vit-keras/releases/download/dl/ViT-B_16_imagenet21k+imagenet2012.npz




In [None]:
# Run the hyperparameter search
tuner.search(train_generator, epochs=3, validation_data=validation_generator, callbacks=[checkpoint, early_stopping, csv_logger, reset_states_callback])



Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
96                |96                |units
0                 |0                 |dropout
0.0018358         |0.0018358         |learning_rate

Epoch 1/3
 15/147 [==>...........................] - ETA: 2:35:51 - loss: 0.8000 - accuracy: 0.6979 - precision: 0.7459 - recall: 0.8955 - F1Score: 0.7195 - auc: 0.5066 - auc_1: 0.7300

In [19]:
# Get the best hyperparameters
best_hparams = tuner.oracle.get_best_trials(num_trials=1)[0].hyperparameters.values
print("Best Hyperparameters:")
print(best_hparams)

Best Hyperparameters:
{'units': 160, 'dropout': 0.2, 'learning_rate': 0.00017122563680770553}


In [20]:
# Get the best hyperparameters
best_trial = tuner.oracle.get_best_trials(num_trials=1)[0]
best_hparams = best_trial.hyperparameters.values
print("Best Hyperparameters:")
print(best_hparams)

Best Hyperparameters:
{'units': 160, 'dropout': 0.2, 'learning_rate': 0.00017122563680770553}


In [21]:
# Build the final model using the best hyperparameters
final_model_test = tuner.hypermodel.build(best_trial.hyperparameters)
final_model_test.compile(optimizer=tf.keras.optimizers.Adam(), loss='binary_crossentropy', metrics=['accuracy', precision_metric,
            recall_metric,
            F1Score,
            auc_metric,
            auc_pr_metric])

In [24]:
# Train the final model
final_history_test = final_model_test.fit(
    train_generator,
    epochs=8,  # Adjust as needed
    validation_data=validation_generator,
    verbose=1
)

Epoch 1/8


ResourceExhaustedError: ignored

In [25]:
# Evaluate the model on the test data
test_loss_full, test_accuracy_full, test_precision_full, test_recall_full, test_f1score_full, test_auc_full, test_auc_pr_full = final_model_test.evaluate(test_generator)

print(f"Test Loss: {test_loss_full}")
print(f"Test Accuracy: {test_accuracy_full}")
print(f"Test Precision: {test_precision_full}")
print(f"Test Recall: {test_recall_full}")
print(f"Test F1 Score: {test_f1score_full}")
print(f"Test AUC: {test_auc_full}")
print(f"Test AUC-PR: {test_auc_pr_full}")

Test Loss: 1.5691089630126953
Test Accuracy: 0.27008548378944397
Test Precision: 0.0
Test Recall: 0.0
Test F1 Score: 0.0
Test AUC: 0.5512628555297852
Test AUC-PR: 0.7547416090965271


In [26]:
import matplotlib.pyplot as plt

# Assuming you have the 'history' object from model.fit()
epochs = range(1, len(final_history_test.history['loss']) + 1)

plt.figure(figsize=(18, 10))

# Plotting Training and Validation Loss
plt.subplot(2, 2, 1)
plt.plot(epochs, final_history_test.history['loss'], label='Training Loss')
plt.plot(epochs, final_history_test.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.xticks(epochs)
plt.legend()

# Plotting Training and Validation Accuracy
plt.subplot(2, 2, 2)
plt.plot(epochs, final_history_test.history['accuracy'], label='Training Accuracy')
plt.plot(epochs, final_history_test.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.xticks(epochs)
plt.legend()

# Plotting Training and Validation Precision
plt.subplot(2, 2, 3)
plt.plot(epochs, final_history_test.history['precision'], label='Training Precision')
plt.plot(epochs, final_history_test.history['val_precision'], label='Validation Precision')
plt.title('Training and Validation Precision')
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.xticks(epochs)
plt.legend()

# Plotting Training and Validation Recall
plt.subplot(2, 2, 4)
plt.plot(epochs, final_history_test.history['recall'], label='Training Recall')
plt.plot(epochs, final_history_test.history['val_recall'], label='Validation Recall')
plt.title('Training and Validation Recall')
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.xticks(epochs)
plt.legend()

# Add similar sections for other metrics like F1 Score, AUC, etc., replacing 'METRIC_NAME' with actual metric names

plt.tight_layout()
plt.show()


NameError: ignored