In [1]:
import numpy as np
import keras
import keras.backend as K
from prednet import PredNet

Using TensorFlow backend.


## Build model

data_format='channels_first'の場合は次のshapeの5階テンソル：(samples, time, channels, rows, cols)

In [2]:
class PredNet( ):
    def __init__(self, T=40, L=10, img_shape=(64,64,3)):
        self.T = T
        self.L = L
        self.img_h = img_shape[0]
        self.img_w = img_shape[1]
        self.img_c = img_shape[2]
        
        self.x_shape = (self.T, 1, self.img_h, self.img_w, self.img_c)
        self.e_shape = (self.T, self.L, self.img_h, self.img_w, self.img_c*2)
        self.r_shape = (self.T, self.L, self.img_h, self.img_w, self.img_c)
        
    def build_train(self):
        # Input Lauer
        # T: 50, L: 10, h:64, w:64, c:3
        X_input = keras.layers.Input(shape=self.x_shape, dtype='float32', name='x_input')
        E_input = keras.layers.Input(shape=self.e_shape, dtype='float32', name='e_input')
        R_input = keras.layers.Input(shape=self.r_shape, dtype='float32', name='r_input')
        # print("L:", self.L, "T:", self.T)

        for t in range(self.T):

            """ === E unit === """
            if t == 0:
                E = E_input

            def E_to_Et(E):
                # E_t: (None, L, h, w, 2c)
                E_t = K.permute_dimensions(E, [1, 0, 2, 3, 4, 5])
                E_t = K.gather(E_t, [t])
                E_t = K.permute_dimensions(E_t, [1, 0, 2, 3, 4, 5])
                E_t = K.squeeze(E_t, axis=1)
                return E_t
            E_t = keras.layers.Lambda(E_to_Et)(E)

            """ === R unit === """
            if t ==0:
                def R_to_Rt(R_input):
                    R_t = K.permute_dimensions(R_input, [1, 0, 2, 3, 4, 5])
                    R_t = K.gather(R_t, [t])
                    R_t = K.permute_dimensions(R_t, [1, 0, 2, 3, 4, 5])
                    R_t = K.squeeze(R_t, axis=1)
                    return R_t
                R_t = keras.layers.Lambda(R_to_Rt)(R_input)
            else:
                # R_t: (None, L, h, w, 3)
                E_t_rev = keras.layers.Lambda(lambda x: K.reverse(x, axes=1))(E_t)
                R_t, state_h_t, state_c_t = keras.layers.ConvLSTM2D(3, (3, 3), 
                                                                    padding='same', 
                                                                    activation='tanh', 
                                                                    return_sequences=True, 
                                                                    return_state=True)(E_t_rev)

            
            for l in range(self.L):  
                """ === R_tl === """
                def Rt_to_Rtl(R_t):
                    R_tl = K.permute_dimensions(R_t, [1, 0, 2, 3, 4])
                    R_tl = K.gather(R_tl, [l])
                    R_tl = K.permute_dimensions(R_tl, [1, 0, 2, 3, 4])
                    R_tl = K.squeeze(R_tl, axis=1)
                    return R_tl
                def Rt_to_Rtl_shape(input_shape):
                    output_shape = (input_shape[0],) + input_shape[-3:]
                    return output_shape

                # R_tl: (None, h, w, 3)
                R_tl = keras.layers.Lambda(Rt_to_Rtl, Rt_to_Rtl_shape)(R_t)                
                
                """ === Ahat_tl === """
                # Ahat_tl: (None, h, w, 3)
                Ahat_tl = keras.layers.Conv2D(3, (3, 3), padding='same')(R_tl)
                Ahat_tl = keras.layers.Activation('relu')(Ahat_tl)

                if l == 0:                    
                    def X_to_Atl(x_input):
                        # (None, T, 1, h, w, c) --> (?, h, w, c)
                        A_tl = K.squeeze(x_input, axis=2) # (None, T, 64, 64, 3)
                        A_tl = K.permute_dimensions(A_tl, [1, 0, 2, 3, 4]) # (T, None, 64, 64, 3)
                        A_tl = K.gather(A_tl, [t]) # (1, None, 64, 64, 3)
                        A_tl = K.squeeze(A_tl, axis=0) # (None, 64, 64, 3)
                        return A_tl
                    def X_to_Atl_shape(input_shape):
                        output_shape = (input_shape[0],) + input_shape[-3:]
                        return output_shape

                    # A_tl: (None, h, w, 3)
                    A_tl = keras.layers.Lambda(X_to_Atl)(X_input)
                    
                    
                    
                # E_tl: (None, h, w, 3)
                err0_tl = keras.layers.Subtract()([A_tl, Ahat_tl])
                err0_tl = keras.layers.Activation('relu')(err0_tl)
                err1_tl = keras.layers.Subtract()([Ahat_tl, A_tl])
                err1_tl = keras.layers.Activation('relu')(err1_tl)
                E_tl =  keras.layers.Concatenate(axis=-1)([err0_tl, err1_tl])

                # A_tl: (None, h, w, 3)
                if l != self.L-1:
                    def Et_to_Etl(E_t):
                        E_tl = K.permute_dimensions(E_t, [1, 0, 2, 3, 4])
                        E_tl = K.gather(E_tl, [l])
                        E_tl = K.permute_dimensions(E_tl, [1, 0, 2, 3, 4])
                        E_tl = K.squeeze(E_tl, axis=1)
                        return E_tl

                    # E_tl: (None, h, w, 3)
                    E_tl = keras.layers.Lambda(Et_to_Etl)(E_t)
                    # A_tl
                    A_tl = keras.layers.Conv2D(3, (3,3), padding='same')(E_tl)

                if l == 0:
                    E_l = keras.layers.Lambda(lambda x: K.expand_dims(x, axis=1))(E_tl)
                else:                
                    E_tl_ = keras.layers.Lambda(lambda x: K.expand_dims(x, axis=1))(E_tl)
                    E_l = keras.layers.Concatenate(axis=1)([E_l, E_tl_])
            if t == 0:
                E = keras.layers.Lambda(lambda x : K.expand_dims(x, axis=1))(E_l)
            else:
                E_l_ = keras.layers.Lambda(lambda x : K.expand_dims(x, axis=1))(E_l)
                E = keras.layers.Concatenate(axis=1)([E, E_l_])


        # print(E) # 50, 10, 64, 64, 6
        model_train = keras.models.Model(inputs=[X_input, E_input, R_input], outputs=[E])
        return model_train

In [34]:
model_train = PredNet(T=40, L=10, img_shape=(64, 64, 3)).build_train()

In [None]:
model_train.compile(optimizer='sgd', loss='mean_squared_error')
model_train.summary()

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import keras
import numpy as np
import pandas as pd
from copy import deepcopy
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

class PredNetBatchGenerator(keras.utils.Sequence):

    def __init__(self, video_path="./data/video/20bn-jester-v1",
                 img_size=(64, 64), 
                 batch_size=1,
                 max_frame_size=40,
                 use_padding=True):
        
        # Load CSV as pd.DataFrame
        self.df_labels = pd.read_csv("./data/labels.csv", header=None, names=["jester name"])
        self.df_labels['label'] = self.df_labels.index.to_series()
        self.df_train = pd.read_csv("./data/train.csv", sep=";", header=None, names=["frame_id", "jester name"])
        self.df_train = pd.merge(self.df_train, self.df_labels, how="left", on = "jester name")

        # debug 
        # TODO: delete this line
        self.df_train = self.df_train[:10000]
        self.max_frame_size = max_frame_size
        
        self.video_path = video_path
        self.num = len(self.df_train)
        self.batch_size = batch_size
        self.img_size = img_size
        self.use_padding = use_padding
        self.batches_per_epoch = int((self.num - 1) / batch_size) + 1

    def __getitem__(self, idx):
        """
        idx: batch id
        """
        batch_from = self.batch_size * idx
        batch_to = batch_from + self.batch_size

        if batch_to > self.num:
            batch_to = self.num

        x_batch = []
        y_batch = []
        
        for index, row in self.df_train[batch_from:batch_to].iterrows(): 
            video=[]
            for i, img_filename in enumerate(os.listdir(os.path.join(self.video_path, str(row["frame_id"])))):
                img_path = os.path.join(self.video_path, str(row["frame_id"]), str(img_filename))
                img_pil = Image.open(img_path).resize(self.img_size)
                img_arr = np.array(img_pil)
                video.append(img_arr)
            video = np.array(video)

            x_batch.append(video)
            y_batch.append(row["label"])


        # Reverce list 
        x_batch_r = deepcopy(x_batch)
        x_batch_r.reverse()

        # Zero padding
        if self.use_padding:
            x_batch   = self._zero_padding(x_batch, self.max_frame_size)
            x_batch_r = self._zero_padding(x_batch_r, self.max_frame_size)

        x_batch = np.asarray(x_batch)
        x_batch = x_batch.astype('float32') / 255.0
        x_batch_r = np.asarray(x_batch_r)
        x_batch_r = x_batch_r.astype('float32') / 255.0
        # y_batch = np.asarray(y_batch)
        
        x_batch = np.expand_dims(x_batch, axis=2)
        z_batch = np.zeros_like(x_batch)
        
        e_batch = np.zeros(x_batch[:2] + (self.max_frame_size,) + x_batch[-3:])
        r_batch = np.zeros(x_batch[:2] + (self.max_frame_size,) + x_batch[-3:])
        
        
        
        # videos, videos
        return [x_batch, e_batch, r_batch], [z_batch]

    def _zero_padding(self, videos, max_frame_size):
        videos_pad=[]
        for v in videos:
            if v.shape[0] < max_frame_size:
                diff = max_frame_size - v.shape[0]
                v_pad = np.pad(v, [(0,diff),(0,0),(0,0),(0,0)], 'constant')
                videos_pad.append(v_pad)
            else:
                videos_pad.append(v)
        return videos_pad

    def __len__(self):
        """
        batch length: 1epochのバッチ数
        """
        return self.batches_per_epoch

    def __getlabel__(self, idx):
        batch_from = self.batch_size * idx
        batch_to = batch_from + self.batch_size

        if batch_to > self.num:
            batch_to = self.num

        label_batch = []
        for index, row in self.df_train[batch_from:batch_to].iterrows(): 
            label_batch.append(row["label"])

        return np.array(label_batch)

    def on_epoch_end(self):
        # 1epochが終わった時の処理
        pass


In [4]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import datetime
import keras
import numpy as np

model_train = PredNet(T=40, L=10, img_shape=(64, 64, 3)).build_train()
model_train.compile(optimizer='sgd', loss='mean_squared_error')
# model_train.summary()


train_batch_generator = PredNetBatchGenerator(video_path="./data/video/20bn-jester-v1",
                                              img_size=(48, 48), 
                                              batch_size=4,
                                              max_frame_size=40,
                                              use_padding=True)

date_string = "prednet_"+datetime.datetime.now().strftime('%Y%m%d %H:%M:%S')
os.mkdir('./log/'+date_string)
print("model logdir :", "./log/"+date_string)

callbacks=[]
callbacks.append(keras.callbacks.CSVLogger(filename='./log/'+date_string+'/metrics.csv'))
callbacks.append(keras.callbacks.ModelCheckpoint(filepath='./log/'+date_string+'/bestweights.hdf5', 
                                                    monitor='loss', 
                                                    save_best_only=True))

history= model_train.fit_generator(train_batch_generator, 
                                  steps_per_epoch=train_batch_generator.__len__(), 
                                  epochs=100, 
                                  verbose=1, 
                                  callbacks=callbacks, 
                                  validation_data=None, 
                                  validation_steps=None, 
                                  class_weight=None, 
                                  max_queue_size=1, 
                                  workers=4,
                                  use_multiprocessing=False, 
                                  shuffle=False, 
                                  initial_epoch=0)


Instructions for updating:
Colocations handled automatically by placer.
model logdir : ./log/prednet_20191126 17:09:51
Instructions for updating:
Use tf.cast instead.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/100


NameError: name 'x' is not defined

In [67]:
bg = PredNetBatchGenerator(batch_size=4)

In [68]:
x_batch, z_batch = bg.__getitem__(1)

In [69]:
x_batch.shape

(4, 40, 1, 64, 64, 3)

In [72]:
x = (4, 40, 1, 64, 64, 3)
x

(4, 40, 1, 64, 64, 3)

(4, 40, 3, 64, 64, 3)

In [80]:
t = 12

e_input = K.permute_dimensions(e_input, [1, 0, 2, 3, 4, 5])
e_input = K.gather(e_input, [t])
e_input = K.permute_dimensions(e_input, [1, 0, 2, 3, 4, 5])

In [81]:
e_input

<tf.Tensor 'transpose_3:0' shape=(?, 1, 10, 64, 64, 6) dtype=float32>

In [43]:
# A_tl: (None, h, w, 3)
def X_to_Atl(x_input):
    A_tl = K.squeeze(x_input, axis=2) # (None, T, 64, 64, 3)
    A_tl = K.permute_dimensions(X, [1, 0, 2, 3, 4, 5]) # (T, None, 64, 64, 3)
    A_tl = K.gather(A_tl, [t]) # (1, None, 64, 64, 3)
    A_tl = K.squeeze(A_tl, axis=0) # (None, 64, 64, 3)
    return A_tl
def X_to_Atl_shape(input_shape):
    output_shape = (input_shape[0],) + input_shape[-3:]
    return output_shape

A_tl = keras.layers.Lambda(X_to_Atl, X_to_Atl_shape)(X_input)

In [None]:
R = keras.layers.wrappers.TimeDistributed(
    keras.layers.ConvLSTM2D(3, (3, 3), return_sequences=True, return_state=True))(x_input)

### Original

In [19]:
nt = 10  # number of timesteps used for sequences in training

# Model parameters
n_channels, im_height, im_width = (3, 128, 160)
input_shape = (n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else (im_height, im_width, n_channels)

stack_sizes = (n_channels, 48, 96, 192)
R_stack_sizes = stack_sizes
A_filt_sizes = (3, 3, 3)
Ahat_filt_sizes = (3, 3, 3, 3)
R_filt_sizes = (3, 3, 3, 3)

layer_loss_weights = np.array([1., 0., 0., 0.])  # weighting for each layer in final loss; "L_0" model:  [1, 0, 0, 0], "L_all": [1, 0.1, 0.1, 0.1]
layer_loss_weights = np.expand_dims(layer_loss_weights, 1)

time_loss_weights = 1./ (nt - 1) * np.ones((nt,1))  # equally weight all timesteps except the first
time_loss_weights[0] = 0.


print("layer_loss_weights:", layer_loss_weights.shape)
print("time_loss_weights :", time_loss_weights.shape)

inputs = keras.layers.Input(shape=(nt,) + input_shape)
errors = prednet(inputs)  # errors will be (batch_size, nt, nb_layers)

errors_by_time = keras.layers.wrappers.TimeDistributed(
    keras.layers.Dense(1, trainable=False), 
    weights=[layer_loss_weights, np.zeros(1)], 
    trainable=False)(errors)  # calculate weighted error by layer
errors_by_time = keras.layers.Flatten()(errors_by_time)  # will be (batch_size, nt)

final_errors = keras.layers.Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time)  # weight errors by time

# model
model = keras.models.Model(inputs=inputs, outputs=final_errors)
model.compile(loss='mean_absolute_error', optimizer='adam')
model.summary()

layer_loss_weights: (4, 1)
time_loss_weights : (10, 1)


NameError: name 'prednet' is not defined

In [None]:
prednet = PredNet(stack_sizes, R_stack_sizes,
                  A_filt_sizes, Ahat_filt_sizes, R_filt_sizes,
                  output_mode='error', return_sequences=True)