In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import keras.layers as L
import pandas as pd
import cv2
from tqdm import tqdm, tqdm_notebook
import os

%matplotlib inline

Using TensorFlow backend.


In [2]:
data = pd.read_csv('data/train_gt.csv')
data.head()

Unnamed: 0,filename,classnum
0,0000.avi,56
1,0001.avi,51
2,0002.avi,64
3,0003.avi,23
4,0004.avi,41


In [3]:
class_num = []
file_name = []
for row in data.iterrows():
    name, label = row[1]['filename'], row[1]['classnum']
    idx = name.split('.')[0]
    file_name.append(idx)
    class_num.append(int(label))
class_num = np.array(class_num)
file_name = np.array(file_name)

In [13]:
from keras.layers.core import Dense, Dropout, Flatten, Activation
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.cudnn_recurrent import CuDNNLSTM
from keras.layers.pooling import GlobalAveragePooling1D, GlobalAveragePooling2D
from keras.layers.merge import Average
from keras.models import Sequential, Model
from keras.layers.wrappers import TimeDistributed
from keras.layers import InputLayer, Input
from keras.layers import Bidirectional
from keras.optimizers import Adam, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.xception import Xception, preprocess_input
from keras.utils import np_utils
from keras import backend as K
from keras.utils import multi_gpu_model

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(file_name, class_num)
X_train.shape, y_train.shape, X_train[:5], y_train[:5]

((7097,),
 (7097,),
 array(['4748', '4489', '5188', '2827', '4194'], dtype='<U4'),
 array([ 8, 31,  3, 32,  4]))

In [7]:
y_train = np_utils.to_categorical(y_train, class_num.max() + 1)
y_test = np_utils.to_categorical(y_test, class_num.max() + 1)

In [8]:
num_frames = []
for name in os.listdir('data/train_converted/imgs/'):
    num_frames.append(len(os.listdir('data/train_converted/imgs/' + name)))
np.max(num_frames), np.min(num_frames)

(1776, 30)

In [9]:
NUM_FRAMES = 10
BATCH_SIZE = 32
IMAGE_SHAPE = (240, 320, 3)
TENSOR_SHAPE = (NUM_FRAMES, ) + IMAGE_SHAPE
TRAIN_IMGS_PATH = 'data/train_converted/imgs/'
TRAIN_FLOW_PATH = 'data/train_converted/flow/'

In [10]:
def get_train_generator(data, label, batch_size=32, circle=True, shuffle_data=True, shuffle_frame=True):
    while True:
        indices = np.arange(len(data))
        if shuffle_data:
            indices = np.random.permutation(indices)
        for start in range(0, len(data), batch_size):
            batch_idx = indices[start : start + batch_size]
            y_batch = label[batch_idx]
            X_batch = np.zeros((y_batch.shape[0],) + TENSOR_SHAPE)
            for i, name in enumerate(data[batch_idx]):
                for k in range(NUM_FRAMES):
                    if not shuffle_frame:
                        start_frame = 0
                    else:
                        start_frame = np.random.randint(int(0.5 * len(os.listdir(TRAIN_IMGS_PATH + name))))
                    img = cv2.imread(TRAIN_IMGS_PATH + '{}/{}.jpg'.format(name, k + start_frame))
                    X_batch[i, k] = preprocess_input(img)
            yield X_batch, y_batch
        if not circle:
            break

In [11]:
def get_eval_generator(names, batch_size=32):
    for start in range(0, len(names), batch_size):
        sub_names = names[start : start + batch_size]
        X_batch = np.zeros((len(sub_names),) + TENSOR_SHAPE)
        for i, name in enumerate(sub_names):
            for k in range(NUM_FRAMES):
                start_frame = np.random.randint(int(0.5 * len(os.listdir('data/test_converted/imgs/' + name))))
                img = cv2.imread('data/test_converted/imgs/{}/{}.jpg'.format(name, k + start_frame))
                X_batch[i, k] = preprocess_input(img)
        yield X_batch

In [12]:
global MODEL
MODEL = Xception(include_top=False, weights='imagenet')
global GRAPH
GRAPH = tf.get_default_graph()

In [16]:
def get_rgb_model(lstm_size):
    base_model = TimeDistributed(MODEL)
    base_model(Input(shape=TENSOR_SHAPE))
    
    x = base_model.output
    x = TimeDistributed(GlobalAveragePooling2D())(x)
    x = Bidirectional(CuDNNLSTM(lstm_size, return_sequences=True))(x)
    x = TimeDistributed(Dense(101, activation='softmax'))(x)
    predictions = GlobalAveragePooling1D()(x)

    with tf.device('/cpu:0'):
        model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layer.layers:
        layer.trainable = False

    parallel_model = multi_gpu_model(model)
    parallel_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    return parallel_model, model

In [17]:
super_model, model = get_rgb_model(512)

In [18]:
train_generator = get_train_generator(X_train, y_train, batch_size=BATCH_SIZE, circle=True, shuffle_data=True, shuffle_frame=True)
valid_generator = get_train_generator(X_test, y_test, batch_size=BATCH_SIZE, circle=True, shuffle_data=False, shuffle_frame=False)

In [19]:
valid_full_path_steps = np.ceil(len(X_test) / BATCH_SIZE)

In [20]:
super_model.fit_generator(train_generator, steps_per_epoch=100, epochs=10, validation_data=valid_generator, 
                          validation_steps=valid_full_path_steps, max_queue_size=30, workers=3, 
                          use_multiprocessing=True)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f94a8b603c8>

In [21]:
for l in model.layers[1].layer.layers[126:]:
        l.trainable = True
super_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [22]:
BATCH_SIZE = 50

In [26]:
valid_full_path_steps = np.ceil(len(X_test) / BATCH_SIZE)

In [23]:
train_generator = get_train_generator(X_train, y_train, batch_size=BATCH_SIZE, circle=True, shuffle_data=True, shuffle_frame=True)
valid_generator = get_train_generator(X_test, y_test, batch_size=BATCH_SIZE, circle=True, shuffle_data=False, shuffle_frame=False)

In [24]:
super_model.fit_generator(train_generator, steps_per_epoch=100, epochs=5, validation_data=valid_generator, 
                          validation_steps=valid_full_path_steps, max_queue_size=30, workers=3, 
                          use_multiprocessing=True)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 5/5


<keras.callbacks.History at 0x7f94a8391ef0>

In [36]:
preds = []
for i in tqdm_notebook(range(10)):
    test_val_gen = get_train_generator(X_test, y_test, batch_size=BATCH_SIZE, circle=False, shuffle_data=False,
                                       shuffle_frame=True)
    prediction = super_model.predict_generator(test_val_gen, steps=valid_full_path_steps, verbose=1)
    preds.append(prediction)
    
final_preds = np.zeros_like(preds[0])
for prd in preds:
    final_preds += prd
final_preds /= len(preds)
total_prediction = np.argmax(final_preds, axis=1)
total_prediction.shape

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))



(2366,)

In [37]:
from sklearn.metrics import accuracy_score
accuracy_score(total_prediction, y_test.argmax(axis=1))

0.8672865595942519

In [29]:
vid_names = os.listdir('data/test_converted/imgs/')
test_full_pass_steps = np.ceil(len(vid_names) / BATCH_SIZE)
preds = []
for i in tqdm_notebook(range(10)):
    test_eval_gen = get_eval_generator(vid_names, batch_size=BATCH_SIZE)
    prediction = super_model.predict_generator(test_eval_gen, steps=test_full_pass_steps, max_queue_size=30, verbose=1)
    preds.append(prediction)
    
final_preds = np.zeros_like(preds[0])
for prd in preds:
    final_preds += prd
final_preds /= len(preds)
total_prediction = np.argmax(final_preds, axis=1)
total_prediction.shape

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))



(3729,)

In [34]:
sub = pd.DataFrame({'filename':['{}.avi'.format(name) for name in vid_names], 'classnum':total_prediction})
sub.head()

Unnamed: 0,filename,classnum
0,0690.avi,13
1,2654.avi,35
2,3132.avi,75
3,0931.avi,92
4,3689.avi,25


In [35]:
sub.to_csv('rgb_bid_lstm512_with_avg_pretty_tuned.csv', index=False)

In [33]:
model.save('rgb_bid_lstm512_with_avg_pretty_tuned.h5')