In [1]:
import pandas as pd
import numpy as np
import os
import cv2
from keras.layers import Dense, Input, InputLayer, Flatten, Conv2D, MaxPool2D
from keras.models import Sequential, Model
from sklearn.model_selection import train_test_split


In [None]:
# First of all, we need to create image dataset from given folders. I deleted any other file except image folders not to take error 
# Images can be RGB or grayscale but they are grayscale in here!
# we can resize the images.
# Classes are planned as one-hot encoding  [class1,class2]

In [2]:
# in dataset, benign and malign image sizes were different so, I standardized it to an average value: 110.  
IMG_HEIGHT = 110
IMG_WIDTH = 110

# this function unifies benign and malign data.
def create_dataset(img_folder):
   
    img_data_array=[]
    class_name=[]
   
    for dir1 in os.listdir(img_folder):
        for file in os.listdir(os.path.join(img_folder, dir1)):
            if dir1 == "benign":
                image_path= os.path.join(img_folder, dir1,  file)
                image= cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                image=cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
                image=np.array(image)  # ı converted numpy array because it is appropriate for deep learning alg. input  
                image = image.astype('float32')
                image /= 255 # i scaled here 
                img_data_array.append(image)  # create image arrays
                class_name.append([1,0])
            elif dir1 == "malignant":  # same steps for beingn is applied here.
                image_path= os.path.join(img_folder, dir1,  file)
                image= cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                image=cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
                image=np.array(image)
                image = image.astype('float32')
                image /= 255 
                img_data_array.append(image)
                class_name.append([0,1])  
                
    return img_data_array, class_name

In [3]:
image_data, class_name = create_dataset(r'originals')   # we used breast cancer dataset. you can obtain from https://data.mendeley.com/datasets/wmy84gzngw/1

In [None]:
np.shape(image_data)    # learn shape of image data

In [None]:
image_data                 # shows how it looks

In [None]:
# convertion hierarchy: list ---> numpy.ndarray (do not forget reshape as follows. Parameters depend on data)

In [4]:
image_data =np.array(image_data).reshape(np.array(image_data).shape[0],110,110,1)  # preparation of data for required dimensions

In [None]:
type(image_data)     # learn type of image data again

In [None]:
np.shape(image_data)    # learn shape of image data again

In [None]:
# train/test split

In [5]:
(trainX, testX, trainY, testY) = train_test_split(image_data,
class_name, train_size=0.75, random_state=40)

In [None]:
# Create model:

In [6]:
model = Sequential()  # here we start to construct our model here

In [None]:
#model.add(InputLayer(input_shape=(300,300,1)))

In [7]:
model.add(Conv2D(50, kernel_size= (3,3), strides = (1,1), activation = 'relu', input_shape= (110,110,1)))  # essential layer for cnn

In [8]:
model.add(MaxPool2D(pool_size=(3,3)))   # essential layer for cnn

In [9]:
model.add(Flatten())     # if you do not flatten probably you will take error. it depends on data structure needed.

In [10]:
model.add(Dense(700, activation='relu'))     # add dense layer 

In [11]:
model.add(Dense(2, activation='softmax'))    # add last dense layer that equals to number of classes.

In [12]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')  # here there are optimizer alternatives to search

In [None]:
# model fit:

In [13]:
model.fit(np.array(trainX), np.array(trainY), batch_size=64, epochs=10, validation_data=(np.array(testX),np.array(testY)))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x200c4e87220>

In [14]:
#predict model:
preds = model.predict(testX, verbose=1)



In [15]:
from sklearn.metrics import confusion_matrix, classification_report
print(classification_report(np.argmax(testY, axis=-1), np.argmax(preds, axis=-1)))

              precision    recall  f1-score   support

           0       0.95      0.90      0.92        20
           1       0.95      0.98      0.97        43

    accuracy                           0.95        63
   macro avg       0.95      0.94      0.94        63
weighted avg       0.95      0.95      0.95        63

