In [25]:
# Data
import numpy as np
import pandas as pd
import os
import csv
import xml.etree.ElementTree as ET
import time

# Framework
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Lambda, MaxPool2D, BatchNormalization, LeakyReLU
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import TensorBoard

# Imaging
import cv2
import matplotlib.pyplot as plt
import seaborn as sns


from tqdm import tqdm
from tensorflow.keras.utils import to_categorical

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.8)
sess = tf.Session(config = tf.ConfigProto(gpu_options=gpu_options))

In [29]:
from zipfile import ZipFile
file_name = 'data.zip'

if not os.path.isdir('blood-cells'):
    with ZipFile(file_name, 'r') as zip:
        zip.extractall()
        print("Finish extracting\n")
else:
    print("Dataset was already extracted previously.\n")

Dataset was already extracted previously.



In [30]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 5625680157901428629, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 12915982380093527830
 physical_device_desc: "device: XLA_CPU device"]

In [33]:
# Dicts for classifiers
dict_characters = {1:'NEUTROPHIL',2:'EOSINOPHIL',3:'MONOCYTE',4:'LYMPHOCYTE', 5:'BASOPHIL'}


In [35]:
from tqdm import tqdm
# from skimage.transform import resize
from tensorflow.keras.utils import to_categorical


def get_data(src_folder):
    imgs = []
    labels = []
    names = ['NEUTROPHIL', 'EOSINOPHIL', 'MONOCYTE', 'LYMPHOCYTE']
    for name in names:
        label = names.index(name)+1

        for img_name in tqdm(os.listdir(src_folder + name)):
            path = os.path.join(src_folder, name, img_name)
            img_file = cv2.imread(path)
            
            if img_file is not None:
                img_file = cv2.resize(img_file, (80, 80))
                img_arr = np.asarray(img_file)
                imgs.append(img_arr)
                labels.append(label)
    return np.asarray(imgs), np.asarray(labels)

X_train, y_train = get_data('blood-cells/dataset2-master/images/TRAIN/')
X_test, y_test = get_data('blood-cells/dataset2-master/images/TEST/')


y_trainHot = to_categorical(y_train, num_classes = 5)
y_testHot = to_categorical(y_test, num_classes = 5)

100%|██████████| 2499/2499 [00:01<00:00, 1523.36it/s]
100%|██████████| 2497/2497 [00:01<00:00, 1723.12it/s]
100%|██████████| 2478/2478 [00:01<00:00, 1735.05it/s]
100%|██████████| 2483/2483 [00:01<00:00, 1708.23it/s]
100%|██████████| 624/624 [00:00<00:00, 1558.60it/s]
100%|██████████| 623/623 [00:00<00:00, 1685.13it/s]
100%|██████████| 620/620 [00:00<00:00, 1720.60it/s]
100%|██████████| 620/620 [00:01<00:00, 487.18it/s] 


In [36]:
def plotHistogram(a):
    """
    Plot histogram of RGB Pixel Intensities
    """
    plt.figure(figsize=(10,5))
    plt.subplot(1,2,1)
    plt.imshow(a)
    plt.axis('off')
    histo = plt.subplot(1,2,2)
    histo.set_ylabel('Count')
    histo.set_xlabel('Pixel Intensity')
    n_bins = 30
    plt.hist(a[:,:,0].flatten(), bins= n_bins, lw = 0, color='r', alpha=0.5);
    plt.hist(a[:,:,1].flatten(), bins= n_bins, lw = 0, color='g', alpha=0.5);
    plt.hist(a[:,:,2].flatten(), bins= n_bins, lw = 0, color='b', alpha=0.5);



In [37]:
# Normalize the dataset

X_train=np.array(X_train)
X_train=X_train/255.

X_test=np.array(X_test)
X_test=X_test/255.

# plotHistogram(X_train[1])
# print(X_train[1].size/3/80)

In [38]:
num_category = len(y_trainHot[0])
image_shape = X_train[0].shape
print(image_shape)

NAME = "logs/{}-{}".format("network", int(time.time()))

tensorboard = TensorBoard(log_dir=NAME)

model = Sequential()

model.add(Conv2D(32, (3, 3), input_shape=image_shape))
# model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))


model.add(Conv2D(32, (3, 3)))
# model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))


model.add(Conv2D(64, (3, 3)))
# model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.05))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))


model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(LeakyReLU(alpha=0.05))
model.add(Dropout(0.5))


model.add(Dense(num_category))
model.add(Activation('softmax'))



model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

#                 model.fit(x_train, x_label, batch_size=32, epochs = epoch, validation_split=0.3, callbacks=[tensorborad])


datagen = ImageDataGenerator(
            rotation_range=10,
            width_shift_range=0.1, 
            height_shift_range=0.1,
            horizontal_flip=True) 


history = model.fit_generator(datagen.flow(X_train, y_trainHot, batch_size=32), validation_data = (X_test, y_testHot), epochs= 30, callbacks=[tensorboard])

(80, 80, 3)
Epoch 1/30

KeyboardInterrupt: 

In [0]:
validation_loss, validation_accuracy = model.evaluate(X_test, y_testHot)
print("Validation loss", validation_loss)
print("Validation accuracy:",validation_accuracy)


Validation loss 0.6766885824706166
Validation accuracy: 0.8242863


In [0]:
model.save('my_model.h5')
# model = load_model('my_model.h5')
