Fontes: 

* https://www.kaggle.com/ozdemirh/flower-recognition-with-transfer-learning-and-svm

* https://www.kaggle.com/andyc97/image-classification-pca-svm-logit-and-cnn

In [1]:
from google.colab import drive

import os

import numpy as np
import pandas as pd

import seaborn as sea

import tensorflow as tf
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet import preprocess_input
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
from sklearn import svm

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

In [2]:
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
classes = ["circular", "falciforme", "outras"]
path_train = "/content/gdrive/My Drive/Colab Notebooks/dataset/dataset_erythrocytes/round_1/train/"
path_test = "/content/gdrive/My Drive/Colab Notebooks/dataset/dataset_erythrocytes/round_1/test/"

In [4]:
# load pretrained MobileNet
model = VGG16(input_shape=(224,224,3), include_top=True)
#model = MobileNet(input_shape=(224,224,3), include_top=True)
model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [5]:
#vector = model.get_layer("reshape_2").output
vector = model.get_layer("fc2").output
feature_extractor = tf.keras.Model(model.input, vector)

In [6]:
# create empty feature and label lists
X_list_train = []
Y_list_train = []

for f in range(3):    
    folder_path = os.path.join(path_train, classes[f])
    for file in os.listdir(folder_path):    
        file_path = os.path.join(folder_path, file)
        
        # check file extension, skip file if not jpg
        if not(file.endswith(".jpg")):
            continue
        
        # load image
        img = image.load_img(file_path, target_size=(224,224))
        # convert image to numpy array
        img_arr = image.img_to_array(img)
        # add 1 more dimension
        img_arr_b = np.expand_dims(img_arr, axis=0)
        # preprocess image
        input_img = preprocess_input(img_arr_b)
        # extract feature
        feature_vec = feature_extractor.predict(input_img)
    
        X_list_train.append(feature_vec.ravel())
        Y_list_train.append(f)

In [7]:
# create empty feature and label lists
X_list_test = []
Y_list_test = []

for f in range(3):    
    folder_path = os.path.join(path_test, classes[f])
    for file in os.listdir(folder_path):    
        file_path = os.path.join(folder_path, file)
        
        # check file extension, skip file if not jpg
        if not(file.endswith(".jpg")):
            continue
        
        # load image
        img = image.load_img(file_path, target_size=(224,224))
        # convert image to numpy array
        img_arr = image.img_to_array(img)
        # add 1 more dimension
        img_arr_b = np.expand_dims(img_arr, axis=0)
        # preprocess image
        input_img = preprocess_input(img_arr_b)
        # extract feature
        feature_vec = feature_extractor.predict(input_img)
    
        X_list_test.append(feature_vec.ravel())
        Y_list_test.append(f)

In [10]:
train_X = np.asarray(X_list_train, dtype=np.float32)
train_Y = np.asarray(Y_list_train, dtype=np.float32)

test_X = np.asarray(X_list_test, dtype=np.float32)
test_Y = np.asarray(Y_list_test, dtype=np.float32)

print("Shape of train_X")
print(train_X.shape)
print("\nShape of test_X")
print(test_X.shape)


Shape of train_X
(563, 4096)

Shape of test_X
(63, 4096)


In [12]:
svm_lin = svm.SVC(C=1.0, kernel="linear")
svm_lin.fit(train_X, train_Y)
y_pred = svm_lin.predict(test_X)
print(classification_report(test_Y, y_pred,
                            target_names=classes))

              precision    recall  f1-score   support

    circular       0.86      0.86      0.86        21
  falciforme       0.86      0.86      0.86        21
      outras       0.71      0.71      0.71        21

    accuracy                           0.81        63
   macro avg       0.81      0.81      0.81        63
weighted avg       0.81      0.81      0.81        63



In [13]:
svm_nlin = svm.SVC(C=1.0, kernel="rbf")
svm_nlin.fit(train_X, train_Y)
y_pred = svm_nlin.predict(test_X)
print(classification_report(test_Y, y_pred,
                            target_names=classes))

              precision    recall  f1-score   support

    circular       0.62      0.95      0.75        21
  falciforme       0.79      0.52      0.63        21
      outras       0.71      0.57      0.63        21

    accuracy                           0.68        63
   macro avg       0.71      0.68      0.67        63
weighted avg       0.71      0.68      0.67        63



In [14]:
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score

In [15]:
# Dimensionality reduction - Full PCA
im_pca = PCA()
im_pca.fit(train_X)
variance_explained_list = im_pca.explained_variance_ratio_.cumsum()
print(variance_explained_list)

[0.4354939  0.5361573  0.62935805 0.6962702  0.7403634  0.77328235
 0.7955649  0.8167361  0.83203053 0.8445367  0.85635126 0.8663708
 0.87440336 0.8818907  0.8882518  0.89374137 0.8990591  0.90386015
 0.90818393 0.9123933  0.9162406  0.9197549  0.92282456 0.9256547
 0.9284103  0.93095624 0.9333535  0.93560725 0.9377124  0.9396902
 0.941574   0.9433923  0.9450085  0.94654894 0.9480273  0.94949615
 0.95093364 0.95235497 0.95369935 0.95497346 0.95618397 0.95735705
 0.95848405 0.95958334 0.9606511  0.9616905  0.96265006 0.9636003
 0.96451396 0.96538633 0.9662177  0.96703154 0.9678036  0.9685552
 0.9692606  0.9699525  0.9706347  0.97129935 0.9719471  0.9725676
 0.9731525  0.973724   0.97427964 0.9748281  0.975362   0.97587806
 0.97639    0.9768819  0.97734755 0.9778065  0.9782513  0.9786946
 0.97912407 0.97953546 0.9799329  0.980323   0.98069614 0.9810525
 0.98140645 0.98175585 0.9820946  0.9824267  0.9827563  0.98307663
 0.98338616 0.98368984 0.9839792  0.9842622  0.98454136 0.98481274
 0.

In [16]:
test_x_pca = im_pca.transform(test_X)
train_x_pca = im_pca.transform(train_X)

In [17]:
# Support vector machine with PCA
def svm_grid_search(C, kernel, train_X, train_Y):
    accuracy_score_list = []
    
    for c in C:
        # Model training
        svmClassifier = svm.SVC(C = c, kernel = kernel)
        svmClassifier.fit(train_X, train_Y.ravel())
        # Prediction on test set
        pred_y = svmClassifier.predict(train_X)
        # Accuracy
        accuracy = accuracy_score(train_Y, pred_y)
        accuracy_score_list.append(accuracy)
        print('Regularization parameters: ', c, 'Accuracy', accuracy)
    
    max_accurarcy_id = accuracy_score_list.index(max(accuracy_score_list))
    return C[max_accurarcy_id] 

C, kernel = [0.1 * i for i in range(1, 30)], 'rbf'
opt_C = svm_grid_search(C, kernel, train_x_pca, train_Y)

Regularization parameters:  0.1 Accuracy 0.7158081705150977
Regularization parameters:  0.2 Accuracy 0.7815275310834814
Regularization parameters:  0.30000000000000004 Accuracy 0.8117229129662522
Regularization parameters:  0.4 Accuracy 0.822380106571936
Regularization parameters:  0.5 Accuracy 0.8383658969804618
Regularization parameters:  0.6000000000000001 Accuracy 0.8525754884547069
Regularization parameters:  0.7000000000000001 Accuracy 0.8650088809946714
Regularization parameters:  0.8 Accuracy 0.8685612788632326
Regularization parameters:  0.9 Accuracy 0.8774422735346359
Regularization parameters:  1.0 Accuracy 0.8845470692717584
Regularization parameters:  1.1 Accuracy 0.8880994671403197
Regularization parameters:  1.2000000000000002 Accuracy 0.9005328596802842
Regularization parameters:  1.3 Accuracy 0.9058614564831261
Regularization parameters:  1.4000000000000001 Accuracy 0.9094138543516874
Regularization parameters:  1.5 Accuracy 0.9182948490230906
Regularization parameters

In [19]:
# Test set
svmClassifier = svm.SVC(C = opt_C, kernel = kernel)
svmClassifier.fit(train_x_pca, train_Y.ravel())
pred_y = svmClassifier.predict(test_x_pca)
accuracy = accuracy_score(test_Y, pred_y)
#print(accuracy)
print(classification_report(test_Y, pred_y,
                            target_names=classes))

              precision    recall  f1-score   support

    circular       0.90      0.86      0.88        21
  falciforme       0.87      0.95      0.91        21
      outras       0.80      0.76      0.78        21

    accuracy                           0.86        63
   macro avg       0.86      0.86      0.86        63
weighted avg       0.86      0.86      0.86        63

