In [3]:
import os
import numpy as np
import pandas as pd
from collections import OrderedDict

from sklearn.datasets import load_files
from sklearn.model_selection import train_test_split

#Importing Keras Libraries
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv3D, MaxPooling3D, GlobalAveragePooling3D
from keras.layers.core import Dense, Dropout
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image

#Import Video Processing Libraries
from skvideo.io import FFmpegReader, ffprobe
from skvideo.utils import rgb2gray
from PIL import Image
from tqdm import tqdm_notebook as tqdm

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
pip install scikit-video

Collecting scikit-video
[?25l  Downloading https://files.pythonhosted.org/packages/b1/a6/c69cad508139a342810ae46e946ebb3256aa6e42f690d901bb68f50582e3/scikit_video-1.1.11-py2.py3-none-any.whl (2.3MB)
[K     |████████████████████████████████| 2.3MB 2.8MB/s 
Installing collected packages: scikit-video
Successfully installed scikit-video-1.1.11


In [0]:
# Image Functions
# 1. Get Total Frame Count in a Video
def getFrameCount(filepath):
    cap = FFmpegReader(filename=filepath)
    framecnt = cap.getShape()[0]
    cap.close()
    return framecnt

In [15]:
raw_data = load_files(r'C/conFusion_3', shuffle=False)
files = raw_data['filenames']
targets = raw_data['target']

FileNotFoundError: ignored

In [0]:
frms = []
for f in files:
    frms.append(getFrameCount(f))
    
unique_elements, counts_elements = np.unique(frms, return_counts=True)
print("Frequency of unique values of the said array:")
print(np.asarray((unique_elements, counts_elements)))

Frequency of unique values of the said array:
[[ 22  23  24  30  31  33  34  36  37  38  39  40  41  42  43  44  45  46
   47  48  49  50  51  52  53  54  55  57  59  61  63  64  65  66  67  75
   77  84 108 109]
 [  2   3   1   2   2   2   2   3   4   5   4   3   7   4   6   3  11   4
   11  10   8   3   3   4   2   1   3   1   1   3   1   2   1   1   1   2
    1   1   1   1]]


In [0]:
files_list = []
for i in range(len(targets)):
    tempdic = OrderedDict()            
    tempdic["FilePath"] = files[i]
    tempdic["ActionType"] = targets[i]
    tempdic["FrameCount"] = getFrameCount(files[i])
    if tempdic["FrameCount"] >= 40:
        files_list.append(tempdic)

In [0]:
dataset = pd.DataFrame(files_list)
dataset.head()

Unnamed: 0,FilePath,ActionType,FrameCount
0,D:/AIML/DATASET/CapStone/MP4_1\hit\50_FIRST_DA...,0,44
1,D:/AIML/DATASET/CapStone/MP4_1\hit\50_FIRST_DA...,0,48
2,D:/AIML/DATASET/CapStone/MP4_1\hit\AmericanGan...,0,50
3,D:/AIML/DATASET/CapStone/MP4_1\hit\Collins_get...,0,41
4,D:/AIML/DATASET/CapStone/MP4_1\hit\Collins_get...,0,43


In [0]:
filepaths = dataset.FilePath.tolist()
actiontypes = dataset.ActionType.tolist()

#filepaths = files
#actiontypes = targets

In [0]:
MAX_FRAMES = 20
REQ_FPS = 10
IS_VALID = range(REQ_FPS)
No_OF_CLASS = len(np.unique(actiontypes))

In [0]:
def getVideo(filepath):
    cap = FFmpegReader(filename=filepath)
    #print('shape:',cap.getShape())
    list_of_frames = []
    fps = int(cap.inputfps)
    
    for index, frame in enumerate(cap.nextFrame()):
        capture_frame = True
        capture_frame = (index % fps) in IS_VALID
        if capture_frame:
            #print(index)
            temp_image = image.array_to_img(frame)
            frame = image.img_to_array(temp_image.resize((128,128),Image.ANTIALIAS)).astype('uint8')
            list_of_frames.append(frame)

    temp_video = np.stack(list_of_frames)
    temp_video = rgb2gray(temp_video)
    #print(np.size(list_of_frames))
    #print('Total Frames:',temp_video.shape)
    cap.close()
    
    total_frames = temp_video.shape[0]
    if MAX_FRAMES <= total_frames:
        front = ((total_frames - MAX_FRAMES) // 2) + 1
        if front == 1:
            front = 0
        temp_video = temp_video[front:(front + MAX_FRAMES)]
    #print('Total Frames:',temp_video.shape)
    return np.expand_dims(temp_video, axis=0)

def getVideoTensor(path, normalize_pixels):
    list_of_videos = [getVideo(p) for p in tqdm(path)]
    tensor = np.vstack(list_of_videos)
    base = normalize_pixels[0]
    r = normalize_pixels[1] - base
    min_ = np.min(tensor, axis=(1, 2, 3), keepdims=True)
    max_ = np.max(tensor, axis=(1, 2, 3), keepdims=True)
    
    return ((tensor.astype('float32') - min_) / (max_ - min_)) * r + base

In [0]:
train_files, test_files, train_targets, test_targets = train_test_split(filepaths, actiontypes, test_size=1/3, random_state=100)

In [0]:
print('Total number of videos:', len(filepaths))
print('\nNumber of videos in training data:', len(train_files))
print('Number of videos in test data:', len(test_files))

Total number of videos: 342

Number of videos in training data: 228
Number of videos in test data: 114


In [0]:
len_train = len(train_files)
val_len = len_train - int(len_train*0.25)

In [0]:
# Taking ~25% of the training data for validation
valid_files = train_files[val_len:]
valid_targets = train_targets[val_len:]

# Remaining data will be used for training the model
train_files = train_files[:val_len]
train_targets = train_targets[:val_len]

In [0]:
print('Number of videos in training data:', len(train_files))
print('Number of videos in validation data:', len(valid_files))
print('Number of videos in test data:', len(test_files))

Number of videos in training data: 171
Number of videos in validation data: 57
Number of videos in test data: 114


In [0]:
X_train = getVideoTensor(train_files, (-1,1))
y_train = to_categorical(train_targets, num_classes=No_OF_CLASS)
print('Shape of training data:', X_train.shape)
print('Shape of training labels:', y_train.shape)

HBox(children=(IntProgress(value=0, max=171), HTML(value='')))


Shape of training data: (171, 20, 128, 128, 1)
Shape of training labels: (171, 4)


In [0]:
# Reading validation videos and one-hot encoding the validation labels
X_valid = getVideoTensor(valid_files, (-1,1))
y_valid = to_categorical(valid_targets, num_classes=No_OF_CLASS)
print('Shape of validation data:', X_valid.shape)
print('Shape of validation labels:', y_valid.shape)

HBox(children=(IntProgress(value=0, max=57), HTML(value='')))


Shape of validation data: (57, 20, 128, 128, 1)
Shape of validation labels: (57, 4)


In [0]:
# Reading testing videos and one-hot encoding the testing labels
X_test = getVideoTensor(test_files, (-1,1))
y_test = to_categorical(test_targets, num_classes=No_OF_CLASS)
print('Shape of testing data:', X_test.shape)
print('Shape of testing labels:', y_test.shape)

HBox(children=(IntProgress(value=0, max=114), HTML(value='')))


Shape of testing data: (114, 20, 128, 128, 1)
Shape of testing labels: (114, 4)


In [0]:
# Using the Sequential Model
model = Sequential()

# Adding Alternate convolutional and pooling layers
model.add(Conv3D(filters=16, kernel_size=(5, 3, 3), strides=(1, 1, 1), padding='same', activation='relu', 
                 input_shape=X_train.shape[1:]))
model.add(MaxPooling3D(pool_size=2, strides=(2, 2, 2), padding='same'))

model.add(Conv3D(filters=64, kernel_size=(2, 3, 3), strides=(1, 1, 1), padding='valid', activation='relu'))
model.add(MaxPooling3D(pool_size=2, strides=(2, 2, 2), padding='same'))

model.add(Conv3D(filters=256, kernel_size=(2, 3, 3), strides=(1, 1, 1), padding='valid', activation='relu'))
model.add(MaxPooling3D(pool_size=2, strides=(2, 2, 2), padding='same'))

model.add(Conv3D(filters=1024, kernel_size=(2, 3, 3), strides=(1, 1, 1), padding='valid', activation='relu'))
model.add(MaxPooling3D(pool_size=2, strides=(2, 2, 2), padding='same'))

# A global average pooling layer to get a 1-d vector
# The vector will have a depth (same as number of elements in the vector) of 1024
model.add(GlobalAveragePooling3D())

# Hidden layer
model.add(Dense(32, activation='relu'))

# Dropout Layer
model.add(Dropout(0.5))

# Output layer
model.add(Dense(No_OF_CLASS, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_1 (Conv3D)            (None, 20, 128, 128, 16)  736       
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 10, 64, 64, 16)    0         
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 9, 62, 62, 64)     18496     
_________________________________________________________________
max_pooling3d_2 (MaxPooling3 (None, 5, 31, 31, 64)     0         
_________________________________________________________________
conv3d_3 (Conv3D)            (None, 4, 29, 29, 256)    295168    
_________________________________________________________________
max_pooling3d_3 (MaxPooling3 (None, 2, 15, 15, 256)    0         
_________________________________________________________________
conv3d_4 (Conv3D)            (None, 1, 13, 13, 1024)   4719616   
__________

In [0]:
# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])

# Saving the model that performed the best on the validation set
checkpoint = ModelCheckpoint(filepath='Model_x.weights.best.hdf5', save_best_only=True, verbose=1)

# Training the model for 40 epochs
history = model.fit(X_train, y_train, batch_size=16, epochs=3, 
                    validation_data=(X_valid, y_valid), verbose=2, callbacks=[checkpoint])

Train on 171 samples, validate on 57 samples
Epoch 1/3
 - 622s - loss: 6.7794 - acc: 0.2690 - val_loss: 10.7454 - val_acc: 0.3333

Epoch 00001: val_loss improved from inf to 10.74540, saving model to Model_x.weights.best.hdf5
Epoch 2/3
 - 624s - loss: 6.2010 - acc: 0.3567 - val_loss: 10.7454 - val_acc: 0.3333

Epoch 00002: val_loss did not improve from 10.74540
Epoch 3/3
 - 617s - loss: 5.4604 - acc: 0.3275 - val_loss: 10.7454 - val_acc: 0.3333

Epoch 00003: val_loss did not improve from 10.74540


In [0]:
# Loading the model that performed the best on the validation set
model.load_weights('Model_x.weights.best.hdf5')

# Testing the model on the Test data
(loss, accuracy) = model.evaluate(X_test, y_test, batch_size=16, verbose=0)

print('Accuracy on test data: {:.2f}%'.format(accuracy * 100))

Accuracy on test data: 23.68%


In [23]:
from google.colab import files
uploaded = files.upload()