In [None]:
import tensorflow as tf
import keras as K
from keras._tf_keras.keras.applications.resnet50 import ResNet50, preprocess_input
from keras._tf_keras.keras.layers import GlobalMaxPooling2D, MaxPooling2D
import dask, distributed

import cv2,os,pickle
import numpy as np
from tqdm.auto import tqdm
from dataclasses import dataclass, Field
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [3]:
model = ResNet50(include_top=False, weights='imagenet', input_shape=(224,224,3))
model.trainable = False # as the resnet model is already trained on another dataset

In [4]:
#model.summary()
model = K.Sequential([
    model, 
    GlobalMaxPooling2D() # using MaxPooling2D will return 3x3x2048, using GlobalMaxPooling2D would return a vector of 2048
])
model.summary()

In [None]:
image_path = 'dataset\41992.jpg'
img = cv2.imread(image_path)
img = cv2.resize(src=img,dsize=(224,224)) # reshape it to 224x224x3
# cv2.imshow("Frame",img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [None]:
os.path.split(r'dataset\41992.jpg')

os.PathLike

In [6]:
# convert image to numpy array
img = np.asarray(img)

In [7]:
print('original shape:',img.shape)

original shape: (224, 224, 3)


In [8]:
expanded_img = np.expand_dims(img, axis=0)
print('expanded shape:',expanded_img.shape)
# 1-> refers to 1 image, which means to process batch we would need to preprocess the bacth

expanded shape: (1, 224, 224, 3)


In [9]:
preprocessed_img = preprocess_input(expanded_img)

In [10]:
result = model.predict(preprocessed_img)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


In [11]:
result.flatten().shape # flatten the output

(2048,)

In [14]:
# now normalize the result
normalized_result = (result.flatten()) / (np.linalg.norm(x = result.flatten()))

In [16]:
normalized_result

array([0.02591854, 0.05526206, 0.00061338, ..., 0.        , 0.01322317,
       0.01222626], dtype=float32)

In [None]:
@dataclass
class Feature_Extraction:
    '''
    functions: extract_feature(s.method), extract_feature_from_folder(method), save_(s.method)

    The extract_feature_from_folder method uses dask.delayed for parallel computation.
    '''
    model:K.Model
    folder:os.path.dirname

    @staticmethod
    def extract_feature(img_path:os.path, model:ResNet50)->np.array:
        # Load the image
        img = cv2.imread(img_path) 
        # Resize the image to 224 x 224 x 3; 3 being the channel
        img = cv2.resize(img,dsize=(224,224))
        # Expand the image
        img = np.expand_dims(a=img, axis=0)
        # Preprocess the image
        img = preprocess_input(img)
        # Predict the image
        img = model.predict(img, verbose='3')
        # Flatten the output from 1,2048 to 2048
        img = img.flatten()
        # Normalize the output
        img = img / np.linalg.norm(img)
        
        return img

    def extract_feature_from_folder(self):
        feature_list =[]
        # get all image_paths in the provided folder
        filenames = [ os.path.join(self.folder, filename) for filename in tqdm(os.listdir(self.folder),desc='Appending files') ]
        # extract features
        feature_list = [ dask.delayed(self.extract_feature) (file_path, self.model) for file_path in tqdm(filenames, desc= 'Generating and appending features') if file_path.endswith('.jpg')  ]
        # return tuple of feature_list, filenames
        features = dask.compute(*feature_list, scheduler='threads')
        return features, filenames
    
    @staticmethod
    def save_(model:K.Model, features_list:list, filenames:list)->None:
        try:
            # creates checkpoint if not exists
            if not os.path.exists('checkpoint'):
                os.makedirs('checkpoint')
            # save model
            model.save('checkpoint/model.keras')

            # pickle the features_list, filenames
            with open('checkpoint/featurevectors.pkl','wb') as file:
                pickle.dump(features_list, file=file, protocol=pickle.HIGHEST_PROTOCOL)

            with open('checkpoint/image_paths.pkl','wb') as file:
                pickle.dump(filenames, file=file, protocol=pickle.HIGHEST_PROTOCOL)
        except Exception:
            raise


In [110]:
fe = Feature_Extraction(model=model, folder='dataset')

In [104]:
feature_list, filenames = fe.extract_feature_from_folder()

Appending files: 100%|██████████| 2907/2907 [00:00<00:00, 356005.77it/s]
Generating and appending features: 100%|██████████| 2907/2907 [00:00<00:00, 5974.67it/s]


In [111]:
fe.save_(model=model, filenames=filenames, features_list=feature_list)