In [1]:
import pandas as pd
import numpy as np
import os
from glob import glob
import random
import matplotlib.pylab as plt
import keras.backend as K
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from tensorflow.keras.optimizers import SGD, RMSprop, Adam, Adagrad, Adadelta
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Conv2D, MaxPool2D, MaxPooling2D
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import VGG16
%matplotlib inline
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

# to ignore displaying warnings
import warnings
warnings.filterwarnings("ignore")


In [2]:
imagePatches = glob('IDC_regular_ps50_idx5/**/*.png', recursive=True)
for filename in imagePatches[0:10]:
    print(filename)

IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1001_class0.png
IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1051_class0.png
IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1101_class0.png
IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1151_class0.png
IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1201_class0.png
IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1251_class0.png
IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1301_class0.png
IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1351_class0.png
IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1501_class0.png
IDC_regular_ps50_idx5\10253\0\10253_idx5_x1001_y1551_class0.png


In [3]:
# Two arrays holding images by class type

class0_no = [] # 0 = no cancer
class1_cancer = [] # 1 = cancer

for filename in imagePatches:
    if filename.endswith("class0.png"):
         class0_no.append(filename)
    else:
        class1_cancer.append(filename)

In [4]:
print(len(class1_cancer))
print(len(class0_no))

78785
198737


In [5]:
img_size = 50

In [6]:
from matplotlib.image import imread
import cv2

def get_image_arrays(data, label):
    img_arrays = []
    for i in data:
        if i.endswith('.png'):
            img = cv2.imread(i ,cv2.IMREAD_COLOR)
            img_sized = cv2.resize(img, (img_size,img_size), interpolation=cv2.INTER_LINEAR)
            img_arrays.append([img_sized, label])
    return img_arrays

In [7]:
class0_array = get_image_arrays(class0_no, 0)
class1_array = get_image_arrays(class1_cancer, 1)

In [8]:
test = cv2.imread('IDC_regular_ps50_idx5/13689/1/13689_idx5_x801_y1501_class1.png' ,cv2.IMREAD_COLOR)
test.shape

(50, 50, 3)

In [9]:
combined_data = np.concatenate((class0_array, class1_array))
random.seed(42)
random.shuffle(combined_data)

In [10]:
X = []
y = []

for features,label in combined_data:
    X.append(features)
    y.append(label)

In [11]:
X = np.array(X).reshape(-1, img_size, img_size, 3)

In [12]:
X.shape

(277522, 50, 50, 3)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(208141, 50, 50, 3) (69381, 50, 50, 3) (208141, 2) (69381, 2)


In [14]:
base_model = VGG16(weights='imagenet', include_top=False,
                            input_shape=(img_size, img_size,3))

# freeze extraction layers
base_model.trainable = False

# add custom top layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.2)(x)
x = Dense(4096,activation="relu")(x)
x = Dense(4096,activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(2096,activation="relu")(x)
predictions = Dense(2, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# confirm unfrozen layers
for layer in model.layers:
    if layer.trainable==True:
        print(layer)

<keras.src.layers.pooling.global_average_pooling2d.GlobalAveragePooling2D object at 0x0000028DB57DE0A0>
<keras.src.layers.regularization.dropout.Dropout object at 0x0000028DB6153760>
<keras.src.layers.core.dense.Dense object at 0x0000028DB613B9D0>
<keras.src.layers.core.dense.Dense object at 0x0000028DB60C9580>
<keras.src.layers.regularization.dropout.Dropout object at 0x0000028DB612CB80>
<keras.src.layers.core.dense.Dense object at 0x0000028DB6162970>
<keras.src.layers.core.dense.Dense object at 0x0000028DB618DFA0>


In [15]:
# Model Summary

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 50, 50, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 50, 50, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 50, 50, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 25, 25, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 25, 25, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 25, 25, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 12, 12, 128)       0     

In [16]:
callbacks = [EarlyStopping(monitor='val_loss', patience=5, verbose=1),
                ModelCheckpoint('VGG16_model.hdf5',
                                 save_best_only=True)]

In [17]:
opt = Adam(learning_rate=0.001)
model.compile(
  loss='categorical_crossentropy',
  optimizer=opt,
  metrics=['accuracy']
)

In [None]:
VGG16_history=model.fit(X_train, 
                        y_train,
                        validation_data=(X_test, y_test),
                        verbose = 1,
                        epochs = 10,
                        callbacks=callbacks)

Epoch 1/10
Epoch 2/10
Epoch 3/10

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

evaluation_results = model.evaluate(X_test, y_test)
accuracy = evaluation_results[1]

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

precision = precision_score(y_test_classes, y_pred_classes, average='weighted')
recall = recall_score(y_test_classes, y_pred_classes, average='weighted')
f1 = f1_score(y_test_classes, y_pred_classes, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 score:", f1)

In [None]:
def f1_score(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

In [None]:
METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),  
      tf.keras.metrics.AUC(name='auc'),
        f1_score,
]

In [None]:
#plot the accuracy graph
plt.figure(figsize = (12,6))
plt.subplot(2,1,1)
plt.plot(VGG16_history.history['accuracy'], label="train_acc")
plt.plot(VGG16_history.history['val_accuracy'], label = "test_acc")
plt.legend()
plt.subplot(2,1,2)
plt.plot(VGG16_history.history['loss'], label = "train_loss")
plt.plot(VGG16_history.history['val_loss'], label = "val_loss")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
from matplotlib import gridspec

# 训练准确率、验证准确率、训练损失和验证损失数据
train_acc = [0.9552, 0.9552, 0.9559, 0.9558, 0.9555, 0.9557]
val_acc = [0.9538, 0.9539, 0.9564, 0.9519, 0.9551, 0.9538]
train_loss = [0.1742, 0.1645, 0.1513, 0.1654, 0.1576, 0.1708]
val_loss = [0.1416, 0.1433, 0.1454, 0.1421, 0.1437, 0.1487]

# 训练周期
epochs = range(1, len(train_acc) + 1)

# 创建画布和子图布局
plt.figure(figsize=(12, 6))
gs = gridspec.GridSpec(2, 1, height_ratios=[3, 2])  # 2行1列，上下高度比例3:2

# 第一个子图：准确率
plt.subplot(gs[0])
plt.plot(epochs, train_acc, 'b', label='Train Accuracy')
plt.plot(epochs, val_acc, 'orange', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# 第二个子图：损失
plt.subplot(gs[1])
plt.plot(epochs, train_loss, 'b', label='Train Loss')
plt.plot(epochs, val_loss, 'orange', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()  # 调整子图布局
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

Y_pred = model.predict(X_test)
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
Y_true = np.argmax(y_test,axis = 1) 

confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 

f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="OrRd",linecolor="black", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()
