In [1]:
from os.path import join
import itertools
import threading
import gc

import keras.backend as K
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import mean_squared_error
from keras.optimizers import Nadam, Adam

import numpy as np
import cv2

def load_ground_truth():
    with open("./data/train.txt") as f:
        ret = [float(l.strip()) for l in f.readlines()]
    return ret

y = np.array(load_ground_truth())

Using TensorFlow backend.


In [2]:
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers import Conv3D, MaxPooling3D, ZeroPadding3D
from keras.models import Sequential
from keras.models import Model

# from kerasmodelzoo.utils.data import download_file, load_np_data
# https://github.com/albertomontesg/keras-model-zoo/blob/master/kerasmodelzoo/models/c3d.py
# https://github.com/axon-research/c3d-keras/blob/master/test_model.py
#_C3D_WEIGHTS_URL = 'https://www.dropbox.com/s/ypiwalgtlrtnw8b/c3d-sports1M_weights.h5?dl=1'


'''
dim_ordering issue:
- 'th'-style dim_ordering: [batch, channels, depth, height, width]
- 'tf'-style dim_ordering: [batch, depth, height, width, channels]
'''

def get_model(weight_file=None):
    """ Return the Keras model of the network
    """
    model = Sequential()
    input_shape=(16, 112, 112, 3) # l, h, w, c
    model.add(Conv3D(64, (3, 3, 3), activation='relu',
                            padding='same', name='conv1',
                            input_shape=input_shape))
    model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2),
                           padding='valid', name='pool1'))
    # 2nd layer group
    model.add(Conv3D(128, (3, 3, 3), activation='relu',
                            padding='same', name='conv2'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool2'))
    # 3rd layer group
    model.add(Conv3D(256, (3, 3, 3), activation='relu',
                            padding='same', name='conv3a'))
    model.add(Conv3D(256, (3, 3, 3), activation='relu',
                            padding='same', name='conv3b'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool3'))
    # 4th layer group
    model.add(Conv3D(512, (3, 3, 3), activation='relu',
                            padding='same', name='conv4a'))
    model.add(Conv3D(512, (3, 3, 3), activation='relu',
                            padding='same', name='conv4b'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool4'))
    # 5th layer group
    model.add(Conv3D(512, (3, 3, 3), activation='relu',
                            padding='same', name='conv5a'))
    model.add(Conv3D(512, (3, 3, 3), activation='relu',
                            padding='same', name='conv5b'))
    model.add(ZeroPadding3D(padding=((0, 0), (0, 1), (0, 1)), name='zeropad5'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
                           padding='valid', name='pool5'))
    model.add(Flatten())
    # FC layers group
    model.add(Dense(4096, activation='relu', name='fc6'))
    model.add(Dropout(.5))
    model.add(Dense(4096, activation='relu', name='fc7'))
    model.add(Dropout(.5))
    model.add(Dense(487, activation='softmax', name='fc8'))
    

    x = Dense(1, activation='linear', name='regression')(model.get_layer('fc7').output)
    model2 = Model(inputs=model.input, outputs=x)
        
    if weight_file is None:
        model.load_weights("./sports1M_weights_tf.h5")
    else:
        model2.load_weights(weight_file)

    #print(model2.summary())

    return model2

# https://github.com/axon-research/c3d-keras/blob/master/models/get_weights_and_mean.sh
c3d_mean = np.load('c3d_mean.npy')
c3d_mean = np.transpose(c3d_mean, (1, 2, 3, 0))


In [3]:
# fix weight
c3d_model = get_model()
for i in range(14):
    c3d_model.layers[i].trainable = False
optimizer = Nadam()
c3d_model.compile(optimizer=optimizer, loss='mean_squared_error')

In [None]:
class InputGenerator(object):
    def __init__(self, X, Y=None, sample_weight=None, mean=None,
                shuffle=False, batch_size=32, random_state=None):
        self.X = X
        self.Y = Y
        self.lock = threading.Lock()
        if random_state is None:
            random_state = np.random.RandomState()
        self.index_generator = self._flow_index(X.shape[0], batch_size, shuffle,
                random_state)
        self.sample_weight = sample_weight
        self.mean = mean
        self.batch_size = batch_size
        
    def __len__(self):
        return (((self.X.shape[0]-16) - 1) // self.batch_size) + 1

    def __iter__(self):
        return self

    def __next__(self, *args, **kwargs):
        return self.next(*args, **kwargs)

    def _flow_index(self, n, batch_size, shuffle, random_state):
        index = np.arange(8, n-8)
        for epoch_i in itertools.count():
            if shuffle:
                random_state.shuffle(index)
            for batch_start in range(8, n-8, batch_size):
                batch_end = min(batch_start + batch_size, n)
                yield epoch_i, index[batch_start: batch_end]

    def next(self):
        with self.lock:
            _, index_array = next(self.index_generator)
        batch_X = []
        for i in index_array:
            batch_x = self.X[np.arange(i-8, i+8)] - self.mean
            img_width, img_height = self.X[0].shape[:2]
            #start_y = np.round((img_height - 112) * self.random_state.rand()))
            #start_x = np.round((img_width - 112) * self.random_state.rand())
            #batch_x = batch_x[:, start_x:(112 - start_x), start_y:(112 - start_y), :]
            start_y = (img_height - 112) // 2
            start_x = (img_width - 112) // 2
            batch_x = batch_x[:, start_x:(112 + start_x), start_y:(112 + start_y), :]
            batch_X.append(batch_x)
        batch_X = np.array(batch_X, dtype=np.float32)
        #print(batch_X.shape)

        if self.Y is None:
            return batch_X
        else:
            batch_Y = self.Y[index_array]
            if self.sample_weight is not None:
                batch_weight = self.sample_weight[index_array]
                return batch_X, batch_Y, batch_weight
            else:
                return batch_X, batch_Y
            
            
def pred_seq(X, model):
    generator = InputGenerator(X, mean=c3d_mean, batch_size=batch_size)
    pred = model.predict_generator(generator, steps=len(generator), verbose=1)
    pred = np.concatenate((np.ones(8)*pred[0][0], pred.reshape(-1), np.ones(8)*pred[-1][0]))
    return pred

def evaluation(X, y, model):
    pred = pred_seq(X, model)
    return mean_squared_error(y, pred)

In [None]:
# load training data
X = []
for i in range(20400):
    img = cv2.imread(join("/tmp/frames/train/", f"{i+1:08d}.png"), 1)
    X.append(cv2.resize(img, (171, 128)))
X = np.array(X, dtype=np.float32)

In [54]:
batch_size = 16
trn_gen = InputGenerator(X[:18000], y[:18000], mean=c3d_mean, batch_size=batch_size, shuffle=True)
val_gen = InputGenerator(X[18000:], y[18000:], mean=c3d_mean, batch_size=batch_size)

checkpoint_callback = ModelCheckpoint("./weights/c3d-{epoch:02d}-{val_loss:.2f}.hdf5")

c3d_model.fit_generator(trn_gen,
                        steps_per_epoch=len(trn_gen),
                        validation_data=val_gen,
                        validation_steps=len(val_gen),
                        max_queue_size=50,
                        workers=6,
                        epochs=5,
                        use_multiprocessing=False,
                        callbacks=[checkpoint_callback],
                       )

Epoch 1/5
Epoch 2/5


ValueError: Error when checking input: expected conv1_input to have 5 dimensions, but got array with shape (0, 1)

In [55]:
print("trn err: ", evaluation(X[:18000], y[:18000]))
print("val err: ", evaluation(X[18000:], y[18000:]))

trn err:  40.09566379024576
val err:  10.128267973933534


In [9]:
# finetune with smaller learning rate
gc.collect()
K.clear_session()
c3d_model = get_model("./weights/c3d")

for i in range(14):
    c3d_model.layers[i].trainable = True
optimizer = Adam(lr=1e-8)
c3d_model.compile(optimizer=optimizer, loss='mean_squared_error')

In [None]:
batch_size = 16
trn_gen = InputGenerator(X[:18000], y[:18000], mean=c3d_mean, batch_size=batch_size, shuffle=True)
val_gen = InputGenerator(X[18000:], y[18000:], mean=c3d_mean, batch_size=batch_size)

checkpoint_callback = ModelCheckpoint("./weights/c3d_fintune-{epoch:02d}-{val_loss:.2f}.hdf5")

c3d_model.fit_generator(trn_gen,
                        steps_per_epoch=len(trn_gen),
                        validation_data=val_gen,
                        validation_steps=len(val_gen),
                        max_queue_size=50,
                        workers=6,
                        epochs=5,
                        use_multiprocessing=False,
                        callbacks=[checkpoint_callback],
                       )

Epoch 1/5

In [70]:
pred_trn = pred_seq(X[:18000], model)
pred_val = pred_seq(X[18000:], model)

print("trn err: ", mean_squared_error(y[:18000], pred_trn))
print("val err: ", mean_squared_error(y[18000:], pred_val))

In [71]:
tstX = []
for i in range(20400):
    img = cv2.imread(join("/tmp/frames/test/", f"{i+1:08d}.png"), 1)
    tstX.append(cv2.resize(img, (171, 128)))
tstX = np.array(tstX, dtype=np.float32)

pred_tsty = pred_seq(tstX, c3dmodel)
with open("test.txt", "w") as f:
    f.write("\n".join([str(_) for _ in pred_tsty]))

KeyboardInterrupt: 