***Importing required Libraries***

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import json
import math
import os
import cv2
from PIL import Image
import numpy as np
from keras import layers
from keras.applications.densenet import DenseNet201
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Activation,Flatten
from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from keras.models import Model
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score
import scipy
from tqdm import tqdm
import tensorflow as tf
from keras import backend as K
import gc
from functools import partial
from sklearn import metrics
from collections import Counter
import json
import itertools

***Loading the dataset and applying the filters***

In [None]:
def Dataset_loader(DIR, RESIZE, sigmaX=10):
    IMG = []
    read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
    for IMAGE_NAME in tqdm(os.listdir(DIR)):
        PATH = os.path.join(DIR,IMAGE_NAME)
        _, ftype = os.path.splitext(PATH)
        if ftype == ".png":
            img = read(PATH)
            gauss = cv2.GaussianBlur(img, (7,7), 3) # Gausian blur
            unsharp_image = cv2.addWeighted(img, 3, gauss, -1.9, 0) #usharp marking

            median_img = cv2.medianBlur(unsharp_image,5)
            img = cv2.resize(median_img, (RESIZE,RESIZE))
           
            IMG.append(np.array(img))
    return IMG
    

benign_train = np.array(Dataset_loader('/content/drive/MyDrive/Training/Benign',120))
malign_train = np.array(Dataset_loader('/content/drive/MyDrive/Training/Malignant',120))
benign_test = np.array(Dataset_loader('/content/drive/MyDrive/Testing/Benign',120))
malign_test = np.array(Dataset_loader('/content/drive/MyDrive/Testing/Malignant',120))

***Creating numpy array of zeros for labeling Benign images and numpy array of ones for labeling Mlaignant images. In the latter part the dataset is shuffled, labels are coverted into categorical format and the data is normalized.***

In [None]:
img_size = 120

# Create labels
benign_train_label = np.zeros(len(benign_train))
malign_train_label = np.ones(len(malign_train))
benign_test_label = np.zeros(len(benign_test))
malign_test_label = np.ones(len(malign_test))

# Merge data 
x_train = np.concatenate((benign_train, malign_train), axis = 0)
y_train = np.concatenate((benign_train_label, malign_train_label), axis = 0)
x_test = np.concatenate((benign_test, malign_test), axis = 0)
y_test = np.concatenate((benign_test_label, malign_test_label), axis = 0)

# Shuffle train data
s = np.arange(x_train.shape[0])
np.random.shuffle(s)
x_train = x_train[s]
x_train = y_train[s]

# Shuffle test data
s = np.arange(x_test.shape[0])
np.random.shuffle(s)
x_test = x_test[s]
y_test = y_test[s]

# To categorical
y_train = to_categorical(y_train, num_classes= 2)
y_test = to_categorical(y_test, num_classes= 2)

# Normalize the data
x_train = np.array(x_train) / 255
x_test = np.array(x_test) / 255

x_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)

x_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

***In this step the dataset is splitted into two sets - train and test sets with 80% and 20% images respectively and images are displayed.***

In [None]:
w=60
h=40
fig=plt.figure(figsize=(15, 15))
columns = 4
rows = 3

for i in range(1, columns*rows +1):
    ax = fig.add_subplot(rows, columns, i)
    if np.argmax(Y_train[i]) == 0:
        ax.title.set_text('Benign')
    else:
        ax.title.set_text('Malignant')
    plt.imshow(x_train[i], interpolation='nearest')
plt.show()

***Checking for the number of Images in Training anf Testing.***

In [None]:
benign_train.shape

In [None]:
malign_train.shape

In [None]:
benign_test.shape

In [None]:
malign_test.shape

***Data Augumentation***

In [None]:
BATCH_SIZE = 16

train_generator = ImageDataGenerator(
        zoom_range=2,  # set range for random zoom
        rotation_range = 90,
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True,  # randomly flip images
    )

***In this step we use CNN model to extract features and then the extracted features are fed to the other models.***


In [None]:
model = Sequential()
model.add(Conv2D(16,(5,5),padding='valid',input_shape = x_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2),strides=2,padding = 'valid'))
model.add(Dropout(0.4))
model.add(Conv2D(32,(5,5),padding='valid'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2),strides=2,padding = 'valid'))
model.add(Dropout(0.6))
model.add(Conv2D(64,(5,5),padding='valid'))
model.add(Activation('relu'))
model.add(Dropout(0.8))
model.add(Flatten())
model.add(Dense(256, name="dense_1"))
model.add(Activation('softmax'))

model_feat = Model(inputs=model.input,outputs=model.get_layer('dense_1').output)
feat_train = model_feat.predict(x_train) #Extracted Features from CNN
feat_test = model_feat.predict(x_test)
print(feat_train)


 **1. Support Vector Machine**

In [None]:
#using kernel = rbf
from sklearn.svm import SVC
svclassifier = SVC()
svclassifier = SVC(kernel='rbf', random_state=42,C=1.0, degree=3, 
          gamma='scale', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False,
          max_iter=-1, decision_function_shape='ovr', break_ties=False)
svclassifier.fit(feat_train,np.argmax(y_train,axis=1))
y_pred = svclassifier.predict(feat_test)

#print("Accuracy of SVM (kernel = 'rbf') is :",svm.score(feat_test,np.argmax(y_test,axis=1)))

#from sklearn.metrics import classification_report, confusion_matrix
#print(confusion_matrix(np.argmax(y_test,axis=1),y_pred))
#print(classification_report(np.argmax(y_test,axis=1),y_pred))
#recall_metric = recall_score((np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import accuracy_score
print('Accuracy: %.3f' % accuracy_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import precision_score
print('Precision: %.3f' % precision_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import recall_score
print('Recall: %.3f' % recall_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import f1_score
print('F1 Score: %.3f' % f1_score(np.argmax(y_test,axis=1), y_pred))


In [None]:
#using kernel = linear
"""from sklearn.svm import SVC
svclassifier = SVC()
svclassifier = SVC(kernel='linear',random_state=42,C=100,cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto',
  max_iter=-1, probability=True, shrinking=True,
  tol=0.001, verbose=False)
svm.fit(feat_train,np.argmax(y_train,axis=1))

print("Accuracy of SVM (kernel = 'linear') is :",svm.score(feat_test,np.argmax(y_test,axis=1)))
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(np.argmax(y_test,axis=1),y_pred))
print(classification_report(np.argmax(y_test,axis=1),y_pred))"""

In [None]:
#using kernel = poly
"""from sklearn.svm import SVC
svc = SVC()
svm = SVC(kernel='poly',random_state=42,C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto',
  max_iter=-1, probability=True, shrinking=True,
  tol=0.001, verbose=False)
svm.fit(feat_train,np.argmax(y_train,axis=1))

print("Accuracy of SVM (kernel = 'polynomial') is :",svm.score(feat_test,np.argmax(y_test,axis=1)))
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(np.argmax(y_test,axis=1),y_pred))
print(classification_report(np.argmax(y_test,axis=1),y_pred))"""

In [None]:
#using kernel =sigmoid
"""from sklearn.svm import SVC
svc = SVC()
svm = SVC(kernel='sigmoid',random_state=42,C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto',
  max_iter=-1, probability=True, shrinking=True,
  tol=0.001, verbose=False)
svm.fit(feat_train,np.argmax(y_train,axis=1))

print("Accuracy of SVM (kernel = 'sigmoid') is :",svm.score(feat_test,np.argmax(y_test,axis=1)))"""

Accuracy of SVM (kernel = 'sigmoid') is : 0.7250280583613917


**2. K Nearest Neighbor**

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3, weights='distance', algorithm='auto', leaf_size=30, p=5, metric='minkowski', metric_params=None, n_jobs=1)
knn.fit(feat_train,np.argmax(y_train,axis=-1))
y_pred=knn.predict(feat_test)
from sklearn.metrics import accuracy_score
print('Accuracy: %.3f' % accuracy_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import precision_score
print('Precision: %.3f' % precision_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import recall_score
print('Recall: %.3f' % recall_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import f1_score
print('F1 Score: %.3f' % f1_score(np.argmax(y_test,axis=1), y_pred))


#print("The Accuracy of KNN is :",knn.score(feat_test,np.argmax(y_test,axis=1)))

**3. Random Forest Classifier**

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(criterion= 'gini', min_samples_leaf= 3, min_samples_split= 7, n_estimators= 25, n_jobs= -1, random_state= 123)
rfc.fit(feat_train,np.argmax(y_train,axis=1))
y_pred=rfc.predict(feat_test)
from sklearn.metrics import accuracy_score
print('Accuracy: %.3f' % accuracy_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import precision_score
print('Precision: %.3f' % precision_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import recall_score
print('Recall: %.3f' % recall_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import f1_score
print('F1 Score: %.3f' % f1_score(np.argmax(y_test,axis=1), y_pred))

#print("Accuracy of Random Forest Classifier is :",rfc.score(feat_test,np.argmax(y_test,axis=1)))

**4. Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(feat_train,np.argmax(y_train,axis=1))
y_pred=logreg.predict(feat_test)
from sklearn.metrics import accuracy_score
print('Accuracy: %.3f' % accuracy_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import precision_score
print('Precision: %.3f' % precision_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import recall_score
print('Recall: %.3f' % recall_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import f1_score
print('F1 Score: %.3f' % f1_score(np.argmax(y_test,axis=1), y_pred))
#print("The Accuracy of Logistic Regression is :",logreg.score(feat_test,np.argmax(y_test,axis=1)))

**5. Naive Bayes Classifier**

In [None]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB(priors=None, var_smoothing=1e-09)
model.fit(feat_train,np.argmax(y_train,axis=1))
y_pred=model.predict(feat_test)
from sklearn.metrics import accuracy_score
print('Accuracy: %.3f' % accuracy_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import precision_score
print('Precision: %.3f' % precision_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import recall_score
print('Recall: %.3f' % recall_score(np.argmax(y_test,axis=1), y_pred))
from sklearn.metrics import f1_score
print('F1 Score: %.3f' % f1_score(np.argmax(y_test,axis=1), y_pred))

print("The Accuracy of Naive Bayes is :",model.score(feat_test,np.argmax(y_test,axis=1)))

**6. Proposed Methodology - CNN model(Framework - Densenet201)**

In [None]:
def build_model(backbone, lr=1e-4):
    model = Sequential()
    model.add(backbone)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(2, activation='softmax'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=lr),
        metrics=['accuracy']
    )
    return model

resnet = DenseNet201(
    weights='imagenet',
    include_top=False,
    input_shape=(120,120,3)
)
model = build_model(resnet ,lr = 1e-4)
model.summary()

***Training and Evaluation***

In [None]:
learn_control = ReduceLROnPlateau(monitor='val_accuracy', patience=5,
                                  verbose=1,factor=0.2, min_lr=1e-7)

filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

history = model.fit_generator(
    train_generator.flow(x_train, y_train, batch_size=BATCH_SIZE),
    steps_per_epoch=x_train.shape[0] / BATCH_SIZE,
    epochs=15,
    validation_data=(x_test, y_test),
    callbacks=[learn_control, checkpoint]
)

***Plotting the Graph***

In [None]:
history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot()

history_df = pd.DataFrame(history.history)
history_df[['accuracy', 'val_accuracy']].plot()

In [None]:
history_df.to_csv('/content/drive/MyDrive/10epoch.csv', mode='a',header='f')

***Prediction***

In [None]:
#Load the model
model.load_weights("weights.best.hdf5")

In [None]:
Y_test_pred = model.predict(x_test)

In [None]:
print("The Acuuracy of CNN is :",accuracy_score(np.argmax(y_test, axis=1), np.argmax(Y_test_pred, axis=1)))

In [None]:
Y_pred = model.predict(X_test)

In [None]:
tta_steps = 10
predictions = []

for i in tqdm(range(tta_steps)):
    preds = model.predict_generator(train_generator.flow(X_test, batch_size=BATCH_SIZE, shuffle=False),
                                    steps = len(X_test)/BATCH_SIZE)
    
    predictions.append(preds)
    gc.collect()
    
Y_pred_tta = np.mean(predictions, axis=0)

***Classification Report***

In [None]:
from sklearn.metrics import classification_report
print(classification_report( np.argmax(Y_test, axis=1), np.argmax(Y_pred_tta, axis=1)))

In [None]:

i=0
prop_class=[]
mis_class=[]

for i in range(len(Y_test)):
    if(np.argmax(Y_test[i])==np.argmax(Y_pred_tta[i])):
        prop_class.append(i)
    if(len(prop_class)==8):
        break

i=0
for i in range(len(Y_test)):
    if(not np.argmax(Y_test[i])==np.argmax(Y_pred_tta[i])):
        mis_class.append(i)
    if(len(mis_class)==8):
        break

# # Display first 8 images of benign
w=60
h=40
fig=plt.figure(figsize=(18, 10))
columns = 4
rows = 2

def Transfername(namecode):
    if namecode==0:
        return "Benign"
    else:
        return "Malignant"
    
for i in range(len(prop_class)):
    ax = fig.add_subplot(rows, columns, i+1)
    ax.set_title("Predicted result:"+ Transfername(np.argmax(Y_pred_tta[prop_class[i]]))
                       +"\n"+"Actual result: "+ Transfername(np.argmax(Y_test[prop_class[i]])))
    plt.imshow(X_test[prop_class[i]], interpolation='nearest')
plt.show()

**Save the complete model and re-train it**

In [None]:
model.save('my_model.h5')

In [None]:
model = tf.keras.models.load_model('my_model.h5')
model.summary()

In [None]:
new_model.evaluate(x_test,y_test)

In [None]:

history = new_model.fit_generator(
    train_generator.flow(x_train, y_train, batch_size=BATCH_SIZE),
    steps_per_epoch=x_train.shape[0] / BATCH_SIZE,
    epochs=3,
    validation_data=(x_test, y_test),
    callbacks=[learn_control, checkpoint]
)