In [13]:
# example of using the vgg16 model as a feature extraction model 
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.applications.vgg16 import decode_predictions
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model

from pickle import dump
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.svm import OneClassSVM
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, roc_curve, f1_score, classification_report

In [2]:
from glob import glob

In [3]:
def prepare_image(path):
    # load an image from file
    image = load_img(path, target_size=(224, 224))

    # convert the image pixels to a numpy array
    image = img_to_array(image)

    # reshape data for the model
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))

    # prepare the image for the VGG model
    image = preprocess_input(image)
    
    return image

In [4]:
# train good images
train_good = []
for p in glob('./bottle/train/good/*'):
    train_good.append(prepare_image(p))
    
# test small broken
test_small = []
for p in glob('./bottle/test/broken_small/*'):
    test_small.append(prepare_image(p))

# test large broken
test_large = []
for p in glob('./bottle/test/broken_large/*'):
    test_large.append(prepare_image(p))
    
# test contamination
test_con = []
for p in glob('./bottle/test/contamination/*'):
    test_con.append(prepare_image(p))
    
# test good 
test_good = []
for p in glob('./bottle/test/good/*'):
    test_good.append(prepare_image(p))

In [5]:
# load model
model = VGG16()
                 
# remove the output layer
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)

In [6]:
def extract_features(data):
    result = []
    for i in range(len(data)):
        features = model.predict(data[i])
        result.append(features[0])
    return pd.DataFrame(result)

In [7]:
# extract features using VGG16 model
df_train = extract_features(train_good)
df_test_small = extract_features(test_small)
df_test_large = extract_features(test_large)
df_test_con = extract_features(test_con)
df_test_good = extract_features(test_good)

In [8]:
# dimensional_reduction_using_PCA(train and reduced)
pca = PCA(n_components=100)
df_train_reduced = pca.fit_transform(df_train)

In [9]:
# reduced the dimension of test set as well using trained pca
df_test_small_reduced = pca.transform(df_test_small)
df_test_large_reduced = pca.transform(df_test_large)
df_test_con_reduced = pca.transform(df_test_con)
df_test_good_reduced = pca.transform(df_test_good)

In [11]:
df_train_inverse = pca.inverse_transform(df_train_reduced)
df_test_small_inverse = pca.inverse_transform(df_test_small_reduced)
df_test_large_inverse = pca.inverse_transform(df_test_large_reduced)
df_test_con_inverse = pca.inverse_transform(df_test_con_reduced)
df_test_good_inverse = pca.inverse_transform(df_test_good_reduced)

In [35]:
# define threshold value based on training loss
train_loss = np.sum((df_train - df_train_inverse) ** 2, axis=1)
threshold = np.max(train_loss)*5
threshold

11.278414532781598

In [36]:
# calculate loss of small broken
small_broken_loss = np.sum((df_test_small - df_test_small_inverse) ** 2, axis=1)
# prediction based on threshold
predict = [1 if val>threshold else 0 for val in small_broken_loss]
real = [1]*len(small_broken_loss)

In [37]:
# calculate loss of large broken
large_broken_loss = np.sum((df_test_large - df_test_large_inverse) ** 2, axis=1)
# prediction based on threshold
predict.extend([1 if val>threshold else 0 for val in large_broken_loss])
real.extend([1]*len(large_broken_loss))

In [38]:
# calculate loss of contamination
cont_loss = np.sum((df_test_con - df_test_con_inverse) ** 2, axis=1)
# prediction based on threshold
predict.extend([1 if val>threshold else 0 for val in cont_loss])
real.extend([1]*len(cont_loss))

In [39]:
# calculate loss of good test image
good_test_loss = np.sum((df_test_good - df_test_good_inverse) ** 2, axis=1)
# prediction based on threshold
predict.extend([1 if val>threshold else 0 for val in good_test_loss])
real.extend([0]*len(good_test_loss))

In [42]:
def print_metrics(real, predict):
    print('Accuracy: ', accuracy_score(real, predict))
    print('\nPrecision: ', precision_score(real, predict))
    print('\nrecall: ', recall_score(real, predict))
    print('\nf1_score: ', f1_score(real, predict))
    print('\nconfusion_matrix:\n ', pd.DataFrame(confusion_matrix(real, predict), index=[0, 1], columns=[0, 1]))
    print('\nclassification_report:\n ', classification_report(real, predict))

In [43]:
print_metrics(predict, real)

Accuracy:  0.9759036144578314

Precision:  1.0

recall:  0.9692307692307692

f1_score:  0.9843749999999999

confusion_matrix:
      0   1
0  18   0
1   2  63

classification_report:
                precision    recall  f1-score   support

           0       0.90      1.00      0.95        18
           1       1.00      0.97      0.98        65

    accuracy                           0.98        83
   macro avg       0.95      0.98      0.97        83
weighted avg       0.98      0.98      0.98        83



# use OneClassSVM model to train normal data only

In [57]:
# define and train one class SVM
model = OneClassSVM(gamma='scale', kernel='rbf', nu=0.001)
model.fit(df_train_reduced)

OneClassSVM(nu=0.001)

In [58]:
# model prediction on test set
test_small_result = model.predict(df_test_small_reduced)
test_large_result = model.predict(df_test_large_reduced)
test_con_result = model.predict(df_test_con_reduced)
test_good_result = model.predict(df_test_good_reduced)

In [59]:
# make real and predict result
predict = test_small_result.tolist()+test_large_result.tolist()+test_con_result.tolist()+test_good_result.tolist()
real = [-1 for i in range(len(test_small))]+[-1 for i in range(len(test_large))]+[-1 for i in range(len(test_con))]+[1 for i in range(len(test_good))]

In [60]:
def print_metrics(real, predict):
    print('Accuracy: ', accuracy_score(real, predict))
    print('\nPrecision: ', precision_score(real, predict))
    print('\nrecall: ', recall_score(real, predict))
    print('\nf1_score: ', f1_score(real, predict))
    print('\nconfusion_matrix:\n ', pd.DataFrame(confusion_matrix(real, predict), index=[-1, 1], columns=[-1, 1]))
    print('\nclassification_report:\n ', classification_report(real, predict))

In [61]:
print_metrics(predict, real)

Accuracy:  0.927710843373494

Precision:  0.8

recall:  0.8888888888888888

f1_score:  0.8421052631578948

confusion_matrix:
      -1   1
-1  61   4
 1   2  16

classification_report:
                precision    recall  f1-score   support

          -1       0.97      0.94      0.95        65
           1       0.80      0.89      0.84        18

    accuracy                           0.93        83
   macro avg       0.88      0.91      0.90        83
weighted avg       0.93      0.93      0.93        83



# test on large features

In [67]:
model = OneClassSVM(gamma='scale', kernel='rbf', nu=0.01)
model.fit(df_train)

OneClassSVM(nu=0.01)

In [68]:
# model prediction on test set
test_small_result = model.predict(df_test_small)
test_large_result = model.predict(df_test_large)
test_con_result = model.predict(df_test_con)
test_good_result = model.predict(df_test_good)

In [69]:
predict = test_small_result.tolist()+test_large_result.tolist()+test_con_result.tolist()+test_good_result.tolist()
real = [-1 for i in range(len(test_small))]+[-1 for i in range(len(test_large))]+[-1 for i in range(len(test_con))]+[1 for i in range(len(test_good))]

In [70]:
print_metrics(predict, real)

Accuracy:  0.8674698795180723

Precision:  0.95

recall:  0.6551724137931034

f1_score:  0.7755102040816326

confusion_matrix:
      -1   1
-1  53   1
 1  10  19

classification_report:
                precision    recall  f1-score   support

          -1       0.84      0.98      0.91        54
           1       0.95      0.66      0.78        29

    accuracy                           0.87        83
   macro avg       0.90      0.82      0.84        83
weighted avg       0.88      0.87      0.86        83



In [30]:
# https://www.pyimagesearch.com/2019/06/24/change-input-shape-dimensions-for-fine-tuning-with-keras/

# https://www.kaggle.com/paperboiii/one-class-classification-for-images

# https://www.kaggle.com/trolukovich/food-5k-feature-extraction-with-resnet50-keras