In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
import cv2
import os.path
import pandas as pd
import numpy as np
from tensorflow.keras import utils
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout, LSTM, TimeDistributed
from tensorflow.keras.applications import InceptionResNetV2
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras

%load_ext tensorboard

In [None]:
PROCESSED_IMAGE_HEIGHT = 128
PROCESSED_IMAGE_WIDTH = 128
DOWNSAMPLING_FRAMES = 40.0

## Functions

In [None]:
def evaluate_dataset(path="D:\datasets\hmdb51_org"):
    paths = []
    categories = []
    for path, directories, files in os.walk(path):
        for f in files:
            paths.append(path + "\\" + f)
            categories.append(path.split("\\")[-1])
    return paths, categories

In [None]:
def downsample_video(video, frames = DOWNSAMPLING_FRAMES):
    framecount = video.shape[0]
    sampled_frames = np.arange(0, framecount, framecount / frames)
    downsampled_video = [video[int(f)] for f in sampled_frames]
    return np.array(downsampled_video)

In [None]:
def preprocess_image(image, width=PROCESSED_IMAGE_WIDTH, height=PROCESSED_IMAGE_HEIGHT, grayscale=True):
    # resize
    image = cv2.resize(image, (width, height,))
    # grayscale
    if grayscale:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # reshape for keras
        image = image.reshape((height, width, 1))
    # normalize
    image = image / 255.0
    return np.float32(image)

In [None]:
def get_formatted_video(path, grayscale=True):
    cap = cv2.VideoCapture(path)
    video = []
    retval, image = cap.read()
    while retval:
        image = preprocess_image(image, grayscale=grayscale)
        video.append(image)
        retval, image = cap.read()
    cap.release()
    video = downsample_video(np.array(video))
    return video

In [None]:
def inflate_target(y, batch_size=16):
    return np.array([y for i in range(batch_size)])

In [None]:
def prepare_training_input(input_path, target, grayscale=True):
    formatted_input = get_formatted_video(input_path, grayscale=grayscale)
    formatted_target = inflate_target(target, formatted_input.shape[0])
    return formatted_input, formatted_target

In [None]:
def create_batch(X_paths, y, batch_size=16, grayscale=True):
    for i in range(0, len(X_paths), batch_size):
        X_batch = []
        y_batch = []
        for b in range(i, i+batch_size):
            if b == len(X_paths):
                break
            X_batch.append(get_formatted_video(X_paths[b], grayscale=grayscale))
            y_batch.append(y[b])

        yield (np.array(X_batch), np.vstack(y_batch))

In [None]:
def create_model(num_classes=51):
    model = Sequential()
    #model.add(Input((40, 128, 128, 3)))
    
    # 2D CNN with time distribution
    model.add(TimeDistributed(Conv2D(32, kernel_size=(3, 3), activation="relu"),
                             input_shape=((40, 128, 128, 1))))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    model.add(TimeDistributed(Conv2D(32, kernel_size=(3, 3), activation="relu")))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    model.add(TimeDistributed(Conv2D(32, kernel_size=(3, 3), activation="relu")))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    model.add(TimeDistributed(Conv2D(32, kernel_size=(3, 3), activation="relu")))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    model.add(TimeDistributed(Conv2D(32, kernel_size=(3, 3), activation="relu")))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    model.add(TimeDistributed(Flatten()))
    
    # LSTM 
    model.add(LSTM(128))
    
    # Some Dense Layers
    model.add(Dense(64, activation="relu"))
    model.add(Dense(64, activation="relu"))
    
    # finalize
    model.add(Dense(num_classes, activation="softmax"))
    model.compile(
        optimizer='adam', 
        loss="categorical_crossentropy",
        metrics=[keras.metrics.Accuracy(), keras.metrics.TopKCategoricalAccuracy(5)])
    return model

create_model().summary()

In [None]:
def create_model_transfer(num_classes=51):
    model = Sequential()
    model.add(Input((40, 128, 128, 3)))
    model.add(
        TimeDistributed(InceptionResNetV2(
            input_shape=(128, 128, 3), 
            include_top=False,
            pooling="avg")))
    # Make transferlearning basemodel weights nontrainable
    model.layers[0].trainable = False
    # LSTM stuff
    model.add(LSTM(128))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(num_classes, activation="softmax"))
    model.compile(
        optimizer='adam', 
        loss="categorical_crossentropy",
        metrics=[keras.metrics.Accuracy(), keras.metrics.TopKCategoricalAccuracy(5)])
    return model

create_model_transfer().summary()

## Train

In [None]:
X, y = evaluate_dataset("D:\datasets\hmdb51_org")

X_train, X_test, y_train, y_test = train_test_split(X, utils.to_categorical(LabelEncoder().fit_transform(y)), random_state=42)

In [None]:
model = create_model_transfer()
# model = keras.models.load_model("model")

In [None]:
# batch_x, batch_y = next(create_batch(X_train, y_train, batch_size=8, grayscale=False))
# batch_x.shape
# model.predict(batch_x).shape

In [None]:
# keras.utils.plot_model(model, show_shapes=True)

In [None]:
import datetime
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
for i in range(200):
    model.fit(create_batch(X_train, y_train, batch_size=8, grayscale=False),
              max_queue_size = 20,
              callbacks=[tensorboard_callback] )

In [None]:
%tensorboard --logdir logs/fit --port 6006 --host localhost

In [None]:
model.evaluate(create_batch(X_test, y_test, batch_size=8))

In [None]:
model.save("model")

## Spielweise / Testgelände