<a href="https://colab.research.google.com/github/victoralcantara75/erythrocytes-classification/blob/feature-extraction/TCC_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#math
import numpy as np
import sklearn
import skimage
import sklearn.model_selection

#ts and keras
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet import preprocess_input
from tensorflow.keras.applications.resnet50 import ResNet50
from keras.preprocessing.image import ImageDataGenerator

#dimensionality
from sklearn.decomposition import PCA

#classificators
from sklearn.svm import SVC

#visualization
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

#utils
import os
import imageio

In [None]:
#dataset
!git clone https://github.com/victoralcantara75/train-test-erythrocytes.git

Cloning into 'train-test-erythrocytes'...
remote: Enumerating objects: 769, done.[K
remote: Counting objects: 100% (769/769), done.[K
remote: Compressing objects: 100% (565/565), done.[K
remote: Total 769 (delta 203), reused 766 (delta 203), pack-reused 0[K
Receiving objects: 100% (769/769), 738.18 KiB | 19.95 MiB/s, done.
Resolving deltas: 100% (203/203), done.


In [None]:
classes = ["circular", "falciforme", "outras"]
batch = 16
epochs = 30
opt = 'adam'
lr = 0.001

In [None]:
def loadDir(round):
  train_dir = './train-test-erythrocytes/dataset/5-fold/round_'+str(round)+'/train'
  test_dir = './train-test-erythrocytes/dataset/5-fold/round_'+str(round)+'/test'
  return train_dir, test_dir

In [None]:
def loadGenerator(train_dir, test_dir):

  train_datagen = ImageDataGenerator(
                          rescale=1. / 255,
                          shear_range=0.2,
                          zoom_range=0.2,
                          vertical_flip=True,
                          horizontal_flip=True)
  test_datagen = ImageDataGenerator(rescale=1. / 255)

  train_generator = train_datagen.flow_from_directory(train_dir, batch_size = batch)
  test_generator = test_datagen.flow_from_directory(test_dir, batch_size = batch)
  return train_generator, test_generator

In [None]:
def createModel():

  base_model = ResNet50(weights='imagenet', include_top=True, input_shape=(224, 224, 3))
  vector = base_model.get_layer("avg_pool").output
  model = tf.keras.Model(base_model.input, vector)

  return model

In [None]:
def extract_features_test(path, model):
  
  x_list = []
  y_list = []

  for label in range(3):    
    folder_path = os.path.join(path, classes[label])
    for file in os.listdir(folder_path):    
        file_path = os.path.join(folder_path, file)
        
        if not(file.endswith(".jpg")):
            continue
        
        # load image
        img = image.load_img(file_path, target_size=(224,224))
        # convert image to numpy array
        img_arr = image.img_to_array(img)
        # add 1 more dimension
        img_arr_b = np.expand_dims(img_arr, axis=0)
        # preprocess image
        input_img = preprocess_input(img_arr_b)
        # extract feature
        features = model.predict(input_img)

        x_list.append(features.ravel())
        y_list.append(label)

  return x_list, y_list

In [None]:
def extract_features_train(path, model):
  
  x_list = []
  y_list = []

  for label in range(3):    
    folder_path = os.path.join(path, classes[label])
    for file in os.listdir(folder_path):    
        file_path = os.path.join(folder_path, file)
        
        if not(file.endswith(".jpg")):
            continue
        
        # load image
        img = image.load_img(file_path, target_size=(224,224))
        # convert image to numpy array
        img_arr = image.img_to_array(img)
        # add 1 more dimension
        img_arr_b = np.expand_dims(img_arr, axis=0)
        # preprocess image
        input_img = preprocess_input(img_arr_b)
        #data augmentation
        da = []
        img_vertical_flip = np.flipud(input_img)
        img_horizontal_flip = np.fliplr(input_img)
        da.append(input_img)
        da.append(img_vertical_flip)
        da.append(img_horizontal_flip)
        # extract feature
        for data in da:
          features = model.predict(data)
          x_list.append(features.ravel())
          y_list.append(label)

  return x_list, y_list

In [None]:
def svm_grid_search(C, kernel, train_X, train_Y):
    accuracy_score_list = []
    
    for c in C:
        # Model training
        svmClassifier = SVC(C = c, kernel = kernel)
        svmClassifier.fit(train_X, train_Y.ravel())
        # Prediction on test set
        pred_y = svmClassifier.predict(train_X)
        # Accuracy
        accuracy = accuracy_score(train_Y, pred_y)
        accuracy_score_list.append(accuracy)
        print('Regularization parameters: ', c, 'Accuracy', accuracy)
    
    max_accurarcy_id = accuracy_score_list.index(max(accuracy_score_list))
    return C[max_accurarcy_id] 

In [None]:
accs = []

for i in range (1, 2):
  
  print("ROUND ", i)
  train_dir, test_dir = loadDir(i)

  model = createModel()

  X_list_train, Y_list_train = extract_features_train(train_dir, model)
  X_list_test, Y_list_test = extract_features_test(test_dir, model)

  train_imgs = np.asarray(X_list_train, dtype=np.float32)
  train_labels = np.asarray(Y_list_train, dtype=np.float32)

  test_imgs = np.asarray(X_list_test, dtype=np.float32)
  test_labels = np.asarray(Y_list_test, dtype=np.float32)

  print("Shape of train_X")
  print(train_imgs.shape)
  print("\nShape of test_X")
  print(test_imgs.shape)



ROUND  1
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
Shape of train_X
(1500, 2048)

Shape of test_X
(126, 2048)


In [None]:
  pca = PCA()
  pca.fit(train_imgs)
  reduc_features_train = pca.transform(train_imgs)

  pca.fit(test_imgs)
  reduc_features_test = pca.transform(test_imgs)

  print(reduc_features_train.shape)
  print(reduc_features_test.shape)

(1500, 126)
(126, 126)


In [None]:
C, kernel = [0.1 * i for i in range(1, 30)], 'linear'
opt_c = svm_grid_search(C, kernel, reduc_features_train, train_labels)

Regularization parameters:  0.1 Accuracy 0.5226666666666666
Regularization parameters:  0.2 Accuracy 0.6093333333333333
Regularization parameters:  0.30000000000000004 Accuracy 0.6246666666666667
Regularization parameters:  0.4 Accuracy 0.6426666666666667
Regularization parameters:  0.5 Accuracy 0.6573333333333333
Regularization parameters:  0.6000000000000001 Accuracy 0.674
Regularization parameters:  0.7000000000000001 Accuracy 0.692
Regularization parameters:  0.8 Accuracy 0.6933333333333334
Regularization parameters:  0.9 Accuracy 0.706
Regularization parameters:  1.0 Accuracy 0.7126666666666667
Regularization parameters:  1.1 Accuracy 0.718
Regularization parameters:  1.2000000000000002 Accuracy 0.7233333333333334
Regularization parameters:  1.3 Accuracy 0.7306666666666667
Regularization parameters:  1.4000000000000001 Accuracy 0.734
Regularization parameters:  1.5 Accuracy 0.738
Regularization parameters:  1.6 Accuracy 0.7426666666666667
Regularization parameters:  1.700000000000

In [None]:
  clf = SVC(C= opt_c, kernel= 'linear')
  clf.fit(train_imgs, train_labels)
  preds = clf.predict(test_imgs)
  print(classification_report(test_labels, preds, target_names=classes))

              precision    recall  f1-score   support

    circular       0.71      0.66      0.68        41
  falciforme       0.93      0.64      0.76        42
      outras       0.58      0.79      0.67        43

    accuracy                           0.70       126
   macro avg       0.74      0.70      0.70       126
weighted avg       0.74      0.70      0.70       126

