In [1]:
from pathlib import Path
import sys
root = Path.cwd().parent.parent
sys.path.insert(1, str(root))
%load_ext autoreload
%autoreload 2

In [2]:
from appconfig.config import PROCESSED_DATA_DIR

# Import needed library

In [3]:
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

In [4]:
conv_base = Xception(
        weights='imagenet',
        include_top=True,
        input_shape=(299, 299, 3))

In [5]:
print(conv_base.input)
print(conv_base.output)

Tensor("input_1:0", shape=(None, 299, 299, 3), dtype=float32)
Tensor("predictions/Identity:0", shape=(None, 1000), dtype=float32)


In [6]:
model2 = Model(conv_base.input, conv_base.layers[-2].output)
print(model2.input)
print(model2.output)

Tensor("input_1:0", shape=(None, 299, 299, 3), dtype=float32)
Tensor("avg_pool/Identity:0", shape=(None, 2048), dtype=float32)


In [11]:
train_dir = str(Path.cwd().parent.parent / 'data' / 'processed' / 'train')
test_dir = str(Path.cwd().parent.parent / 'data' / 'processed' / 'test')
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20

def extract_features(directory, sample_count, label_count, pretrained_model):
    features = np.zeros(shape=(sample_count, 2048))
    labels = np.zeros(shape=(sample_count,label_count))
    
    generator = datagen.flow_from_directory(
            directory,
            target_size=(299, 299),
            batch_size=batch_size,
            class_mode='categorical')
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = pretrained_model.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    return features, labels

train_features, train_labels = extract_features(train_dir, 2400, 11, model2)
test_features, test_labels = extract_features(test_dir, 800, 11, model2)

# Standardization 

In [54]:
sc = StandardScaler()

train_features_std = sc.fit_transform(train_features)
test_features = sc.transform(test_features)

# PCA

In [77]:
pca128 = PCA(n_components = 128)
train_features_std_128 = pca128.fit_transform(train_features_std)
test_features_std_128 = pca128.transform(test_features_std)

# SVM

In [109]:
clf128 = SVC(kernel='linear', probability=True)
clf128.fit(train_features_std_128, train_labels)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [117]:
cross_val_score(clf, train_features_std_128, train_labels, cv=10, n_jobs=-1)

array([0.99583333, 0.99583333, 1.        , 0.99583333, 0.99583333,
       0.99166667, 1.        , 0.99583333, 0.99583333, 0.99583333])

In [118]:
clf.score(test_features_std_128, test_labels)

0.99875