In [None]:
# for loading/processing the images  
from keras.preprocessing.image import load_img 
from keras.preprocessing.image import img_to_array 
from keras.applications.vgg16 import preprocess_input 

# models 
from keras.applications.vgg16 import VGG16 
from keras.models import Model

# clustering and dimension reduction
# from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# for everything else
import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd
import pickle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

# load the model first and pass as an argument, remove the output layer
model = VGG16()
model = Model(inputs = model.inputs, outputs = model.layers[-2].output)

def extract_features(file, model):
    # load the image as a 224x224 array
    img = load_img(file, target_size=(224,224), interpolation='bicubic')
    # convert from 'PIL.Image.Image' to numpy array
    img = np.array(img) 
    # reshape the data for the model reshape(num_of_samples, dim 1, dim 2, channels)
    reshaped_img = img.reshape(1,224,224,3) 
    # prepare image for model
    imgx = preprocess_input(reshaped_img)
    # get the feature vector
    features = model.predict(imgx, use_multiprocessing=True)
    return features

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Check the root folder
!ls /content/drive/

# Path should be visually same as in drive.google.com 
!ls /content/drive/MyDrive/CS1/trainingimage

path = "" #path to training image
# change the working directory to the path where the images are located
os.chdir(path)
# this list holds all the image filename
nutImg = []
# creates a ScandirIterator aliased as files
with os.scandir(path) as files:
  # loops through each file in the directory
    for file in files:
        if file.name.endswith('.png'):
            # adds only the image files to the fashion list
            nutImg.append(file.name)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
MyDrive
100.png  19.png  28.png  37.png  46.png  55.png  64.png  73.png  82.png  91.png
10.png	 1.png	 29.png  38.png  47.png  56.png  65.png  74.png  83.png  92.png
11.png	 20.png  2.png	 39.png  48.png  57.png  66.png  75.png  84.png  93.png
12.png	 21.png  30.png  3.png	 49.png  58.png  67.png  76.png  85.png  94.png
13.png	 22.png  31.png  40.png  4.png	 59.png  68.png  77.png  86.png  95.png
14.png	 23.png  32.png  41.png  50.png  5.png	 69.png  78.png  87.png  96.png
15.png	 24.png  33.png  42.png  51.png  60.png  6.png	 79.png  88.png  97.png
16.png	 25.png  34.png  43.png  52.png  61.png  70.png  7.png	 89.png  98.png
17.png	 26.png  35.png  44.png  53.png  62.png  71.png  80.png  8.png	 99.png
18.png	 27.png  36.png  45.png  54.png  63.png  72.png  81.png  90.png  9.png


In [None]:

data = {}

# lop through each image in the dataset
for nut in nutImg:
    # try to extract the features and update the dictionary
    try:
        feat = extract_features(nut,model)
        data[nut] = feat
    # if something fails, save the extracted features as a pickle file (optional)
    except:
        with open(p,'wb') as file:
            pickle.dump(data,file)
            
# get a list of the filenames
filenames = np.array(list(data.keys()))

# get a list of just the features
feat_train = np.array(list(data.values()))
feat_train.shape

# reshape so that there are n images of 4096 vectors
feat_train = feat_train.reshape(-1,4096)



In [None]:
from google.colab import drive
drive.mount('/content/drive')


path = "" #path to normal testing image
# change the working directory to the path where the images are located
os.chdir(path)
# this list holds all the image filename
nutImg_test_normal = []
# creates a ScandirIterator aliased as files
with os.scandir(path) as files:
  # loops through each file in the directory
    for file in files:
        if file.name.endswith('.png'):
            # adds only the image files to the fashion list
            nutImg_test_normal.append(file.name)
data = {}

# lop through each image in the dataset
for nut in nutImg_test_normal:
    # try to extract the features and update the dictionary
    try:
        feat = extract_features(nut,model)
        data[nut] = feat
    # if something fails, save the extracted features as a pickle file (optional)
    except:
        with open(p,'wb') as file:
            pickle.dump(data,file)
            
# get a list of the filenames
filenames = np.array(list(data.keys()))

# get a list of just the features
feat_test_normal = np.array(list(data.values()))
feat_test_normal.shape

# reshape so that there are n images of 4096 vectors
feat_test_normal = feat_test_normal.reshape(-1,4096)
feat_test_normal.shape


In [None]:
from google.colab import drive
drive.mount('/content/drive')



path = "" ##path to anomaly testing image
# change the working directory to the path where the images are located
os.chdir(path)
# this list holds all the image filename
nutImg_test_anomal = []
# creates a ScandirIterator aliased as files
with os.scandir(path) as files:
  # loops through each file in the directory
    for file in files:
        if file.name.endswith('.png'):
            # adds only the image files to the fashion list
            nutImg_test_anomal.append(file.name)
data = {}

# lop through each image in the dataset
for nut in nutImg_test_anomal:
    # try to extract the features and update the dictionary
    try:
        feat = extract_features(nut,model)
        data[nut] = feat
    # if something fails, save the extracted features as a pickle file (optional)
    except:
        with open(p,'wb') as file:
            pickle.dump(data,file)
            
# get a list of the filenames
filenames = np.array(list(data.keys()))

# get a list of just the features
feat_test_anomal = np.array(list(data.values()))
feat_test_anomal.shape

# reshape so that there are n images of 4096 vectors
feat_test_anomal = feat_test_anomal.reshape(-1,4096)
feat_test_anomal.shape


In [None]:
#Isolation Forest
from sklearn.ensemble import IsolationForest

X_train = feat_train
model = IsolationForest(contamination=.04,random_state=0).fit(X_train)


In [None]:
X_test = np.concatenate((feat_test_normal, feat_test_anomal), axis = 0)

from sklearn import metrics  

y_test_normal = np.ones(feat_test_normal.shape[0]).reshape(feat_test_normal.shape[0], 1)
y_test_anomal = -1 * np.ones(feat_test_anomal.shape[0]).reshape(feat_test_anomal.shape[0], 1)

y_test = np.concatenate((y_test_normal, y_test_anomal), axis=0)

preds_forest = model.predict(X_test)  
targs_forest = y_test

preds_forest = preds_forest*-1
targs_forest = targs_forest*-1

print("accuracy: ", metrics.accuracy_score(targs_forest, preds_forest))  
print("precision: ", metrics.precision_score(targs_forest, preds_forest))  
print("recall: ", metrics.recall_score(targs_forest, preds_forest))  
print("f1: ", metrics.f1_score(targs_forest, preds_forest))  
print("area under curve (auc): ", metrics.roc_auc_score(targs_forest, preds_forest))

accuracy:  0.9583333333333334
precision:  0.92
recall:  0.9583333333333334
f1:  0.9387755102040817
area under curve (auc):  0.9583333333333335
