<a href="https://colab.research.google.com/github/vallimeenaa9/Breast_Cancer/blob/main/Cancer_Image_Classification_on_BACH_Dataset_using_Ensemble_Learning_Research_Venture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##**Joint Research venture between V Valli Meenaa, Aadhi Aadhavan, and Dr. V. Jitendra Tembhurne (IIIT-N)**

###Loading Data, Data Pre-Processing 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install torchvision

In [None]:
!pip install -U efficientnet

In [None]:
import efficientnet.keras as efn

In [None]:
import zipfile
import os
zip_ref = zipfile.ZipFile('/content/drive/MyDrive/Cancer_PNG.zip', 'r') #Opens the zip file in read mode
zip_ref.extractall('/tmp') #Extracts the files into the /tmp folder
zip_ref.close()

In [None]:
import torch
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import cv2
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
import os
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import EfficientNetB0
from keras.models import Sequential,Model
from keras.applications.vgg16 import preprocess_input,VGG16
from keras.layers import MaxPooling2D,Conv2D,Dense,BatchNormalization,Dropout,GlobalAveragePooling2D,Flatten,Input,GlobalMaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from sklearn.metrics import classification_report,confusion_matrix
from keras.utils.vis_utils import plot_model
import ipywidgets as widgets
from sklearn.preprocessing import OneHotEncoder
import io
from PIL import Image
from IPython.display import display,clear_output
from warnings import filterwarnings
from glob import glob
from tifffile import imread, imwrite
from skimage.transform import resize

In [None]:
len(os.listdir('/tmp/Cancer_PNG/'))

In [None]:
# Prepere data
benign = os.listdir('/tmp/Cancer_PNG/Benign')
insitu  = os.listdir('/tmp/Cancer_PNG/InSitu')
invasive  = os.listdir('/tmp/Cancer_PNG/Invasive')
normal  = os.listdir('/tmp/Cancer_PNG/Normal')

In [None]:
# Prepere input data
X_data =[]
for file in benign:
    img = cv2.imread('/tmp/Cancer_PNG/Benign/'+file)
    face = cv2.resize(img, (224, 224) )
    (b, g, r)=cv2.split(face) 
    img=cv2.merge([r,g,b])
    X_data.append(img)

for file in insitu:
    img = cv2.imread('/tmp/Cancer_PNG/InSitu/'+file)
    face = cv2.resize(img, (224, 224) )
    (b, g, r)=cv2.split(face) 
    img=cv2.merge([r,g,b])
    X_data.append(img)
  
for file in invasive:
    img = cv2.imread('/tmp/Cancer_PNG/Invasive/'+file)
    face = cv2.resize(img, (224, 224) )
    (b, g, r)=cv2.split(face) 
    img=cv2.merge([r,g,b])
    X_data.append(img)

for file in normal:
    img = cv2.imread('/tmp/Cancer_PNG/Normal/'+file)
    face = cv2.resize(img, (224, 224) )
    (b, g, r)=cv2.split(face) 
    img=cv2.merge([r,g,b])
    X_data.append(img)

In [None]:
image_width=224
image_height=224

In [None]:
X = np.squeeze(X_data)
X.shape

In [None]:
from matplotlib import pyplot as plt
plt.imshow(X[5], interpolation='nearest')
plt.show()

In [None]:
target_benign=np.full(len(benign),3)
target_insitu=np.full(len(insitu),2)
target_invasive=np.full(len(invasive),1)
target_normal=np.full(len(normal),0)
Y=np.concatenate([target_benign,target_insitu,target_invasive,target_normal])
Y

In [None]:
len(Y)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle='true')
number_of_train = X_train.shape[0]
number_of_test = X_test.shape[0]
print('number_of_train:', number_of_train)
print('number_of_test:', number_of_test)

##ENSEMBLE LEARNING

###VGG16 CNN ARCHITECTURE

In [None]:
early_stop=EarlyStopping(patience=4)
reduceLR=ReduceLROnPlateau(patience=4)

In [None]:
# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
with tpu_strategy.scope():
      vgg_model = VGG16(weights='imagenet',include_top=False)
      for layers in vgg_model.layers:
          layers.trainable=False
      x=vgg_model.output
      x=GlobalAveragePooling2D()(x)
      x=Dense(128,activation='relu')(x)
      x=Dropout(0.15)(x)
      output=Dense(4,activation='softmax')(x)
      model2=Model(inputs=vgg_model.input,outputs=output)
      model2.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [None]:
from random import shuffle

ind_list = [i for i in range(len(X_train))]
shuffle(ind_list)
X_train_new  = X_train[ind_list, :,:,:]
Y_train_new = Y_train[ind_list]

In [None]:
# Checkpoint to save best model per epoch
model_filepath = "/content/drive/My Drive/vgg-{epoch:02d}-{val_accuracy:.4f}.hdf5"
model_checkpoint_callbackvgg = tf.keras.callbacks.ModelCheckpoint(
    filepath=model_filepath,
    verbose=1,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [None]:
r2=model2.fit(X_train_new,Y_train_new,validation_split=0.2,epochs=20, callbacks=[model_checkpoint_callbackvgg])

###ResNet50 CNN MODEL

In [None]:
with tpu_strategy.scope():
  res=tf.keras.applications.ResNet50(weights='imagenet',include_top=False)
  for layers in res.layers:
          layers.trainable=False
  x=res.output
  x=GlobalAveragePooling2D()(x)
  x=Dense(128,activation='relu')(x)
  x=Dropout(0.15)(x)
  output=Dense(4,activation='softmax')(x)
  model3=Model(inputs=res.input,outputs=output)
  model3.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [None]:
from random import shuffle

ind_list = [i for i in range(len(X_train))]
shuffle(ind_list)
X_train_new  = X_train[ind_list, :,:,:]
Y_train_new = Y_train[ind_list]

In [None]:
model_filepath = "/content/drive/My Drive/res-{epoch:02d}-{val_accuracy:.4f}.hdf5"
model_checkpoint_callbackres = tf.keras.callbacks.ModelCheckpoint(
    filepath=model_filepath,
    verbose=1,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [None]:
r2=model3.fit(X_train_new,Y_train_new,validation_split=0.2,epochs=20, callbacks=[model_checkpoint_callbackres])

###STACKED MODEL

In [None]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Average
model_1 = load_model("/content/drive/My Drive/vgg-13-0.7344.hdf5")
model_1 = Model(inputs=model_1.inputs,
                outputs=model_1.outputs,
                name='vgg16')
model_2 = load_model("/content/drive/My Drive/res-20-0.8281.hdf5")
model_2 = Model(inputs=model_2.inputs,
                outputs=model_2.outputs,
                name='resnet50')
models = [model_1, model_2]
model_input = Input(shape=(image_width, image_height, 3))
model_outputs = [model(model_input) for model in models]
ensemble_output = Average()(model_outputs)
ensemble_model = Model(inputs=model_input, outputs=ensemble_output, name='ensemble')

In [None]:
ensemble_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [None]:
model_filepath = "/content/drive/My Drive/e1-{epoch:02d}-{val_accuracy:.4f}.hdf5"
model_checkpoint_callbacke1 = tf.keras.callbacks.ModelCheckpoint(
    filepath=model_filepath,
    verbose=1,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [None]:
r4=ensemble_model.fit(X_train_new,Y_train_new,validation_split=0.2,epochs=20, callbacks=[model_checkpoint_callbacke1])