<a href="https://colab.research.google.com/github/z-arabi/SRU-deeplearning-workshop/blob/master/16_pretrained_convnet_feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import keras
import cv2
import os
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input

In [4]:
base_model = VGG16(weights='imagenet')
base_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     14758

In [2]:
# include_top means the ending FC layers > without this the model parameters decrease lot
# therefore, the model size will decrease too

# while using the feature extractor > the input_shape is arbitrary, what we want
# it is not necessary to use the default one

# feature extractor is somthing that is done in this notebook > different from Transfer learning
# in transfer learning the last layer before classification layer will be trained

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(224, 224, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [3]:
conv_base.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
base_dir = os.path.expanduser("./my_dataset")
os.makedirs(base_dir, exist_ok=True)
base_dir

In [None]:
cls ="bottle"
c = 0
path = os.path.join(base_dir, cls)
os.makedirs(path, exist_ok=True)


# simple code for collecting data with Webcam
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    frame = cv2.resize(frame, (224, 224))
    cv2.imshow('Webcam', frame)
    key = cv2.waitKey(1)
    if  key == 13: #13 is the Enter Key
        break
    elif key == 32: #32 is the Space Key
        c += 1
        image_path = os.path.join(path, str(c)+".png")
        print("save to: ", image_path)
        cv2.imwrite(image_path, frame)

cap.release()
cv2.destroyAllWindows()

In [None]:
cls ="mug"
c = 0
path = os.path.join(base_dir, cls)
os.makedirs(path, exist_ok=True)


# simple code for collecting data with Webcam
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    frame = cv2.resize(frame, (224, 224))
    cv2.imshow('Webcam', frame)
    key = cv2.waitKey(1)
    if  key == 13: #13 is the Enter Key
        break
    elif key == 32: #32 is the Space Key
        c += 1
        image_path = os.path.join(path, str(c)+".png")
        print("save to: ", image_path)
        cv2.imwrite(image_path, frame)

cap.release()
cv2.destroyAllWindows()

In [None]:
# find the total data of both classes
total = 0
for root, dirs, files in os.walk("./my_dataset"):
    total += len(files)
print("total files in a directory: ", total)

In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator


datagen = ImageDataGenerator(rescale=1./255)
batch_size = 25

def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 7, 7, 512))
    labels = np.zeros(shape=(sample_count))
    generator = datagen.flow_from_directory(
        directory,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='binary')
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            # Note that since generators yield data indefinitely in a loop,
            # we must `break` after every image has been seen once.
            break
    return features, labels, generator.class_indices

train_features, train_labels, dictionary = extract_features(base_dir, total)

In [None]:
# the traditional ML approaches needs the array > flatten one > [samples , shape]
train_features = np.reshape(train_features, (-1, 7 * 7 * 512))

'''
there is another way
we have 7*7 features and 512 channels >
each channel show one specific thing: temperature, color, mouth, ...
so we can operae max pooling or average one for each 7*7 and get 1 number to summary one feature
then we have 1*512 > what is ok
'''

In [None]:
# the features are in the way that linear separable
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(train_features, train_labels)


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')

In [None]:
dictionary

In [None]:
def get_class_name(l):
    for name, label in dictionary.items():
        if label == l:
            return name

In [None]:
get_class_name(0)

In [None]:
import cv2
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    frame = cv2.resize(frame, (224, 224))

    x = np.expand_dims(frame, axis=0)
    x = x/255.0
    feature = conv_base.predict(x)
    feature = np.reshape(feature, (-1, 7 * 7 * 512))

    predicted = neigh.predict(feature)

    text = get_class_name(predicted[0])

    cv2.putText(frame,  text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), lineType=cv2.LINE_AA)
    cv2.imshow('Webcam', frame)
    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break

cap.release()
cv2.destroyAllWindows()

# Skip the middle Layers


In [7]:
conv_base

<keras.src.engine.functional.Functional at 0x7d95b30ee320>

In [9]:
conv_base.layers[-1].output

<KerasTensor: shape=(None, 7, 7, 512) dtype=float32 (created by layer 'block5_pool')>

In [13]:
conv_base.input

<KerasTensor: shape=(None, 224, 224, 3) dtype=float32 (created by layer 'input_1')>

In [15]:
from keras.models import Model
from keras.layers import Input

image_input = Input(shape=(224, 224, 3))
out = conv_base.layers[-1].output

my_model = Model(conv_base.input, out)
my_model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   