In [1]:
# example of using the vgg16 model as a feature extraction model 
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.applications.vgg16 import decode_predictions
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Model

from pickle import dump
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.svm import OneClassSVM
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, roc_curve, f1_score, classification_report

In [2]:
from glob import glob

In [3]:
def prepare_image(path):
    # load an image from file
    image = load_img(path, target_size=(224, 224))

    # convert the image pixels to a numpy array
    image = img_to_array(image)

    # reshape data for the model
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))

    # prepare the image for the VGG model
    image = preprocess_input(image)
    
    return image

In [5]:
# train good images
train_good = []
for p in glob('./bottle/train/good/*'):
    train_good.append(prepare_image(p))
    
# test small broken
test_small = []
for p in glob('./bottle/test/broken_small/*'):
    test_small.append(prepare_image(p))

# test large broken
test_large = []
for p in glob('./bottle/test/broken_large/*'):
    test_large.append(prepare_image(p))
    
# test contamination
test_con = []
for p in glob('./bottle/test/contamination/*'):
    test_con.append(prepare_image(p))
    
# test good 
test_good = []
for p in glob('./bottle/test/good/*'):
    test_good.append(prepare_image(p))

In [6]:
# load model
model = ResNet50(input_shape=(244, 244, 3), include_top=False, pooling='avg')

In [7]:
def extract_features(data):
    result = []
    for i in range(len(data)):
        features = model.predict(data[i])
        result.append(features[0])
    return pd.DataFrame(result)

In [8]:
# extract features using VGG16 model
df_train = extract_features(train_good)
df_test_small = extract_features(test_small)
df_test_large = extract_features(test_large)
df_test_con = extract_features(test_con)
df_test_good = extract_features(test_good)



In [9]:
# dimensional_reduction_using_PCA(train and reduced)
pca = PCA(n_components=100)
df_train_reduced = pca.fit_transform(df_train)

In [10]:
# reduced the dimension of test set as well using trained pca
df_test_small_reduced = pca.transform(df_test_small)
df_test_large_reduced = pca.transform(df_test_large)
df_test_con_reduced = pca.transform(df_test_con)
df_test_good_reduced = pca.transform(df_test_good)

In [11]:
df_train_inverse = pca.inverse_transform(df_train_reduced)
df_test_small_inverse = pca.inverse_transform(df_test_small_reduced)
df_test_large_inverse = pca.inverse_transform(df_test_large_reduced)
df_test_con_inverse = pca.inverse_transform(df_test_con_reduced)
df_test_good_inverse = pca.inverse_transform(df_test_good_reduced)

In [12]:
# define threshold value based on training loss
train_loss = np.sum((df_train - df_train_inverse) ** 2, axis=1)
threshold = np.max(train_loss)*5
threshold

11.165448908128868

In [13]:
# calculate loss of small broken
small_broken_loss = np.sum((df_test_small - df_test_small_inverse) ** 2, axis=1)
# prediction based on threshold
predict = [1 if val>threshold else 0 for val in small_broken_loss]
real = [1]*len(small_broken_loss)

In [14]:
# calculate loss of large broken
large_broken_loss = np.sum((df_test_large - df_test_large_inverse) ** 2, axis=1)
# prediction based on threshold
predict.extend([1 if val>threshold else 0 for val in large_broken_loss])
real.extend([1]*len(large_broken_loss))

In [15]:
# calculate loss of contamination
cont_loss = np.sum((df_test_con - df_test_con_inverse) ** 2, axis=1)
# prediction based on threshold
predict.extend([1 if val>threshold else 0 for val in cont_loss])
real.extend([1]*len(cont_loss))

In [16]:
# calculate loss of good test image
good_test_loss = np.sum((df_test_good - df_test_good_inverse) ** 2, axis=1)
# prediction based on threshold
predict.extend([1 if val>threshold else 0 for val in good_test_loss])
real.extend([0]*len(good_test_loss))

In [17]:
def print_metrics(real, predict):
    print('Accuracy: ', accuracy_score(real, predict))
    print('\nPrecision: ', precision_score(real, predict))
    print('\nrecall: ', recall_score(real, predict))
    print('\nf1_score: ', f1_score(real, predict))
    print('\nconfusion_matrix:\n ', pd.DataFrame(confusion_matrix(real, predict), index=[0, 1], columns=[0, 1]))
    print('\nclassification_report:\n ', classification_report(real, predict))

In [18]:
print_metrics(predict, real)

Accuracy:  0.9518072289156626

Precision:  0.9523809523809523

recall:  0.9836065573770492

f1_score:  0.9677419354838709

confusion_matrix:
      0   1
0  19   3
1   1  60

classification_report:
                precision    recall  f1-score   support

           0       0.95      0.86      0.90        22
           1       0.95      0.98      0.97        61

    accuracy                           0.95        83
   macro avg       0.95      0.92      0.94        83
weighted avg       0.95      0.95      0.95        83



# use OneClassSVM model to train normal data only

In [19]:
# define and train one class SVM
model = OneClassSVM(gamma='scale', kernel='rbf', nu=0.001)
model.fit(df_train_reduced)

OneClassSVM(nu=0.001)

In [20]:
# model prediction on test set
test_small_result = model.predict(df_test_small_reduced)
test_large_result = model.predict(df_test_large_reduced)
test_con_result = model.predict(df_test_con_reduced)
test_good_result = model.predict(df_test_good_reduced)

In [21]:
# make real and predict result
predict = test_small_result.tolist()+test_large_result.tolist()+test_con_result.tolist()+test_good_result.tolist()
real = [-1 for i in range(len(test_small))]+[-1 for i in range(len(test_large))]+[-1 for i in range(len(test_con))]+[1 for i in range(len(test_good))]

In [22]:
def print_metrics(real, predict):
    print('Accuracy: ', accuracy_score(real, predict))
    print('\nPrecision: ', precision_score(real, predict))
    print('\nrecall: ', recall_score(real, predict))
    print('\nf1_score: ', f1_score(real, predict))
    print('\nconfusion_matrix:\n ', pd.DataFrame(confusion_matrix(real, predict), index=[-1, 1], columns=[-1, 1]))
    print('\nclassification_report:\n ', classification_report(real, predict))

In [23]:
print_metrics(predict, real)

Accuracy:  0.891566265060241

Precision:  0.75

recall:  0.7894736842105263

f1_score:  0.7692307692307692

confusion_matrix:
      -1   1
-1  59   5
 1   4  15

classification_report:
                precision    recall  f1-score   support

          -1       0.94      0.92      0.93        64
           1       0.75      0.79      0.77        19

    accuracy                           0.89        83
   macro avg       0.84      0.86      0.85        83
weighted avg       0.89      0.89      0.89        83



# test on large features

In [24]:
model = OneClassSVM(gamma='scale', kernel='rbf', nu=0.01)
model.fit(df_train)

OneClassSVM(nu=0.01)

In [25]:
# model prediction on test set
test_small_result = model.predict(df_test_small)
test_large_result = model.predict(df_test_large)
test_con_result = model.predict(df_test_con)
test_good_result = model.predict(df_test_good)

In [26]:
predict = test_small_result.tolist()+test_large_result.tolist()+test_con_result.tolist()+test_good_result.tolist()
real = [-1 for i in range(len(test_small))]+[-1 for i in range(len(test_large))]+[-1 for i in range(len(test_con))]+[1 for i in range(len(test_good))]

In [27]:
print_metrics(predict, real)

Accuracy:  0.8072289156626506

Precision:  1.0

recall:  0.5555555555555556

f1_score:  0.7142857142857143

confusion_matrix:
      -1   1
-1  47   0
 1  16  20

classification_report:
                precision    recall  f1-score   support

          -1       0.75      1.00      0.85        47
           1       1.00      0.56      0.71        36

    accuracy                           0.81        83
   macro avg       0.87      0.78      0.78        83
weighted avg       0.86      0.81      0.79        83



In [30]:
# https://www.pyimagesearch.com/2019/06/24/change-input-shape-dimensions-for-fine-tuning-with-keras/

# https://www.kaggle.com/paperboiii/one-class-classification-for-images

# https://www.kaggle.com/trolukovich/food-5k-feature-extraction-with-resnet50-keras