In [3]:
# for loading/processing the images  
from keras.preprocessing.image import load_img 
from keras.preprocessing.image import img_to_array 
from keras.applications.vgg16 import preprocess_input 

# models 
from keras.applications.vgg16 import VGG16 
from keras.models import Model

# clustering and dimension reduction
# from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# for everything else
import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd
import pickle

In [5]:
#path to traning image folder
path = r""
# change the working directory to the path where the images are located
os.chdir(path)
# this list holds all the image filename
nutImg = []
# creates a ScandirIterator aliased as files
with os.scandir(path) as files:
  # loops through each file in the directory
    for file in files:
        if file.name.endswith('.png'):
            # adds only the image files to the fashion list
            nutImg.append(file.name)

In [7]:
nutImg?

In [8]:
# load the model first and pass as an argument, remove the output layer
model = VGG16()
model = Model(inputs = model.inputs, outputs = model.layers[-2].output)

def extract_features(file, model):
    # load the image as a 224x224 array
    img = load_img(file, target_size=(224,224), interpolation='bicubic')
    # convert from 'PIL.Image.Image' to numpy array
    img = np.array(img) 
    # reshape the data for the model reshape(num_of_samples, dim 1, dim 2, channels)
    reshaped_img = img.reshape(1,224,224,3) 
    # prepare image for model
    imgx = preprocess_input(reshaped_img)
    # get the feature vector
    features = model.predict(imgx, use_multiprocessing=True)
    return features

In [9]:
data = {}
#path to project location
p = r""
# lop through each image in the dataset
for nut in nutImg:
    # try to extract the features and update the dictionary
    try:
        feat = extract_features(nut,model)
        data[nut] = feat
    # if something fails, save the extracted features as a pickle file (optional)
    except:
        with open(p,'wb') as file:
            pickle.dump(data,file)
            
# get a list of the filenames
filenames = np.array(list(data.keys()))

# get a list of just the features
feat = np.array(list(data.values()))
feat.shape

# reshape so that there are n images of 4096 vectors
feat = feat.reshape(-1,4096)
feat.shape

(100, 4096)

In [11]:
# PCA to reduce dimention of feature
pca = PCA(n_components=100, random_state=22)
pca.fit(feat)
x = pca.transform(feat)