In [1]:
import cv2
import os
import numpy as np
import math


from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.layers import Dense, Input
from keras.layers.pooling import GlobalAveragePooling2D
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
from keras.optimizers import Nadam
from keras.utils import to_categorical
from keras.models import load_model


Using TensorFlow backend.


In [2]:
def video_to_frames(video):
    # extract frames from a video and return a frame array
    vidcap = cv2.VideoCapture(video)
    frames = []
    while vidcap.isOpened():
        success, image = vidcap.read()
        
        if success:
            frames.append(image)
        else:
            break
    cv2.destroyAllWindows()
    vidcap.release()
    
    return np.array(frames)


In [3]:
frame_lens = []

for folder in os.listdir('dataset/train'):
    folder_path = 'dataset/train/' + folder
    for filename in os.listdir(folder_path):
        frames = video_to_frames(folder_path + '/' + filename)
        frame_lens.append(len(frames))

np.array(frame_lens).min()

88

In [3]:
train_data = []
train_label = []

label = 0
for folder in os.listdir('dataset/train'):
    folder_path = 'dataset/train/' + folder
    for filename in os.listdir(folder_path):
        frames = video_to_frames(folder_path + '/' + filename)
        frames = frames[math.floor(len(frames)/2) - 44 : math.floor(len(frames)/2) + 44]
        print(filename, label)
        train_data.append(frames)
        train_label.append(label)

    label += 1

train_data = np.array(train_data)
train_label = np.array(train_label)

horizontalview_p22_faint_a1.avi 0
horizontalview_p01_faint_a1.avi 0
horizontalview_p24_faint_a1.avi 0
horizontalview_p15_faint_a1.avi 0
horizontalview_p18_faint_a1.avi 0
horizontalview_p19_faint_a1.avi 0
horizontalview_p09_faint_a1.avi 0
horizontalview_p08_faint_a1.avi 0
horizontalview_p02_faint_a1.avi 0
horizontalview_p17_faint_a1.avi 0
horizontalview_p16_faint_a1.avi 0
horizontalview_p07_wonder_a1.avi 1
horizontalview_p03_wonder_a1.avi 1
horizontalview_p07_wonder_a2.avi 1
horizontalview_p03_wonder_a2.avi 1
horizontalview_p04_wonder_a2.avi 1
horizontalview_p04_wonder_a1.avi 1
horizontalview_p08_wonder_a2.avi 1
horizontalview_p08_wonder_a1.avi 1
horizontalview_p01_wonder_a2.avi 1
horizontalview_p05_wonder_a2.avi 1
horizontalview_p01_wonder_a1.avi 1
horizontalview_p05_wonder_a1.avi 1
horizontalview_p02_wonder_a1.avi 1
horizontalview_p06_wonder_a1.avi 1
horizontalview_p02_wonder_a2.avi 1
horizontalview_p06_wonder_a2.avi 1
horizontalview_p02_car_a1.avi 2
horizontalview_p24_car_a1.avi 2
ho

In [5]:
train_label = to_categorical(train_label, num_classes=label)

In [4]:
test_data = []
test_label = []

label = 0
for folder in os.listdir('dataset/test'):
    folder_path = 'dataset/test/' + folder
    for filename in os.listdir(folder_path):
        frames = video_to_frames(folder_path + '/' + filename)
        if len(frames) > 88:
            frames = frames[math.floor(len(frames)/2) - 44 : math.floor(len(frames)/2) + 44]
        
        new_frames = np.zeros([max(len(frames), 88), 240,320,3])
        for i in range(len(frames)):
            new_frames[i] = cv2.resize(frames[i], dsize=(320, 240), interpolation=cv2.INTER_CUBIC)
        
        print(filename, label)
        test_data.append(new_frames)
        test_label.append(label)

    label += 1

test_data = np.array(test_data)
test_label = np.array(test_label)


faint_test_3.mp4 0
faint_test_2.mp4 0
faint_test_1.mp4 0
walk_test_2.mp4 3
walk_test_3.mp4 3
walk_test_1.mp4 3
crouch_test_2.mp4 4
crouch_test_3.mp4 4
crouch_test_1.mp4 4
bend_test_2.mp4 5
bend_test_1.mp4 5
jump_test_1.mp4 6
jump_test_3.mp4 6
jump_test_2.mp4 6
jump_test_4.mp4 6
run_test_1.mp4 7
run_test_3.mp4 7
run_test_2.mp4 7


In [5]:
test_label = to_categorical(test_label, num_classes=label)

In [6]:
del frames
del new_frames

In [13]:
classes = label
frames, rows, columns, channels = train_data.shape[1], train_data.shape[2], train_data.shape[3], train_data.shape[4]

video = Input(shape=(frames, rows, columns, channels))
cnn_base = VGG16(input_shape=(rows, columns, channels), weights="imagenet", include_top=False)
cnn_out = GlobalAveragePooling2D()(cnn_base.output)
cnn = Model(input=cnn_base.input, output=cnn_out)
cnn.trainable = False
encoded_frames = TimeDistributed(cnn)(video)
encoded_sequence = LSTM(256)(encoded_frames)
hidden_layer = Dense(output_dim=1024, activation="relu")(encoded_sequence)
outputs = Dense(output_dim=classes, activation="softmax")(hidden_layer)
model = Model([video], outputs)
optimizer = Nadam(lr=0.002,
                  beta_1=0.9,
                  beta_2=0.999,
                  epsilon=1e-08,
                  schedule_decay=0.004)
model.compile(loss="categorical_crossentropy",
              optimizer=optimizer,
              metrics=["categorical_accuracy"]) 

  import sys
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':


In [10]:
history = model.fit(train_data, train_label, epochs=3, batch_size=4)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [11]:
model.save(filepath='my_model.h5')

In [8]:
model = load_model('my_model.h5')

In [9]:
prediction = model.predict(test_data)

In [10]:
for p in prediction:
    print(p.argmax())

7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7


In [11]:
test_data.shape

(112, 88, 240, 320, 3)

In [25]:
test_data.shape

(18, 88, 240, 320, 3)

In [13]:
train_label

array([[1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 

In [17]:
for i in range(len(test_data)):
    print(i, test_data[i].shape)

0 (88, 240, 320, 3)
1 (88, 240, 320, 3)
2 (88, 240, 320, 3)
3 (88, 240, 320, 3)
4 (88, 240, 320, 3)
5 (88, 240, 320, 3)
6 (88, 240, 320, 3)
7 (88, 240, 320, 3)
8 (88, 240, 320, 3)
9 (88, 240, 320, 3)
10 (88, 240, 320, 3)
11 (88, 240, 320, 3)
12 (88, 240, 320, 3)
13 (88, 240, 320, 3)
14 (88, 240, 320, 3)
15 (7, 240, 320, 3)
16 (88, 240, 320, 3)
17 (88, 240, 320, 3)


In [11]:
label

8

In [14]:
model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 88, 240, 320, 3)   0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 88, 512)           14714688  
_________________________________________________________________
lstm_1 (LSTM)                (None, 256)               787456    
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              263168    
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 8200      
Total params: 15,773,512
Trainable params: 15,773,512
Non-trainable params: 0
_________________________________________________________________


In [15]:
TimeDistributed?