In [1]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import os
from IPython.core.display import display

# first lets look at the data
dataFolder = 'data_cleaned'
wordLists = os.listdir(dataFolder)

#populate the data frame with words
ASL_df = pd.DataFrame(wordLists,columns = ['Word']);
#populate with full paths
ASL_df['Full Path'] = ASL_df['Word'].apply(lambda x: os.path.join(dataFolder,x))
#populate with # samples
ASL_df['# samples'] = ASL_df['Full Path'].apply(lambda x: len(os.listdir(x)))

#sort them by largest or smallest
ASL_df = ASL_df.sort_values(by=['# samples'],ascending = False)

print('Words with the most # of examples')
display(ASL_df.head(10))
print(ASL_df.head(10)


Words with the most # of examples


Unnamed: 0,Word,Full Path,# samples
46,ART-DESIGN,data_cleaned\ART-DESIGN,32
185,CUTE,data_cleaned\CUTE,30
15,ADVISE-INFLUENCE,data_cleaned\ADVISE-INFLUENCE,24
261,PAST,data_cleaned\PAST,24
207,GUITAR,data_cleaned\GUITAR,24
205,GOVERNMENT,data_cleaned\GOVERNMENT,24
16,AFRAID,data_cleaned\AFRAID,24
95,BIG,data_cleaned\BIG,24
37,ANSWER,data_cleaned\ANSWER,22
18,AGAIN,data_cleaned\AGAIN,22


In [43]:
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D
import cv2

def getImagesFromVideoFile(videoName):
    #get video source object
    cap = cv2.VideoCapture(videoName)
    w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    numFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    size = (h,w)
    vid_Frames = [] #initialize
    
    if(cap.isOpened()):
        while(True):
            #read the captured video
            ret, frame = cap.read()
            
            if(not ret):
                #no more frames so lets exit the loop
                break
            im = cv2.resize(frame, (224, 224), interpolation = cv2.INTER_AREA)
            #now lets just cast it and fix the channels
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB).astype(np.float64)
            vid_Frames.append(im)
    else:
        print('Something wrong with VideoCapture')
    #release the object
    cap.release()
    return vid_Frames,size,numFrames

        
#lets load the model
FeatureExtractor = Sequential()
conv_base = VGG16(weights='imagenet', include_top=False)
FeatureExtractor.add(conv_base)
# FeatureExtractor.add(GlobalAveragePooling2D()) # to reduce dimension for the the RNN

            
FeatureExtractor.summary()




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, None, None, 512)   14714688  
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________


In [52]:
import time 
NumClasses = 1
for index, row in ASL_df.head(NumClasses).iterrows():
    #get the full paths of the videos
    videoNameSources = [os.path.join(row['Full Path'],x) for x in os.listdir(row['Full Path'])]
    for i, videoName in enumerate(videoNameSources):
        vid_Frames,size,numFrames = getImagesFromVideoFile(videoName)
        #lets create the destination directory
        baseDirectory = 'data_features'
        if not os.path.exists(baseDirectory):
            os.makedirs(baseDirectory)
        destDirectory = os.path.join(baseDirectory,row['Word'] + '_'+str(i))
        #now lets extract the features in batch
        print('Processing {} frames'.format(numFrames))
        print('Starting Feature Extraction')
        start = time.time()
        x = np.array(vid_Frames) # create the mult-dim array
        x = preprocess_input(x) # process input to work with the VGG network
        y = FeatureExtractor.predict(x) # extract Features
        
        end = time.time()
        print('Total Time: {} mins \n'.format((end-start)/60))
        
        # now lets save the features to the destination directory 
        np.save(destDirectory, y)
        break


breaking out
Processing 176.0 frames
Starting Feature Extraction
Total Time: 2.985128446420034 mins 



In [46]:
print(y.shape)

(151, 7, 7, 512)
