In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
import cv2
import os.path
import pandas as pd
import numpy as np
from tensorflow.keras import utils
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten
from sklearn.preprocessing import LabelEncoder

In [None]:
# default shape = (240, 320, 3)
PROCESSED_IMAGE_HEIGHT = 120
PROCESSED_IMAGE_WIDTH = 160

In [None]:
def evaluate_dataset(path):
    dataset = pd.DataFrame()
    for path, directories, files in os.walk("D:\datasets\hmdb51_org"):
        for f in files:
            dataset = dataset.append(
                {"name": f, "path": path + "\\" + f, "category": path.split("\\")[-1]},
                ignore_index=True
            )
    return dataset

In [None]:
def preprocess_image(image, width=PROCESSED_IMAGE_WIDTH, height=PROCESSED_IMAGE_HEIGHT):
    # grayscale
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # resize
    image = cv2.resize(image, (width, height,))
    # reshape for keras
    image = image.reshape((height, width, 1))
    # normalize
    image = image / 255.0
    return image

In [None]:
def create_model(input_width=PROCESSED_IMAGE_WIDTH, input_height=PROCESSED_IMAGE_HEIGHT, num_classes=51):
    model = Sequential()
    model.add(Input((input_height, input_width, 1)))
    
    # TODO Conv layers?
    model.add(Conv2D(32, kernel_size=(3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    # some Dense layers?
    model.add(Flatten())
    model.add(Dense(64, activation="relu"))
    model.add(Dense(32, activation="relu"))
    
    # finish
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam', loss="categorical_crossentropy")
    return model

In [None]:
def get_formatted_video(path):
    cap = cv2.VideoCapture(path)
    video = []
    retval, image = cap.read()
    while retval:
        image = preprocess_image(image)
        video.append(image)
        retval, image = cap.read()
    cap.release()
    return np.array(video)

In [None]:
def train(model, X, y):
    X_formatted = get_formatted_video(X)
    y_formatted = np.array([y for i in range(X_formatted.shape[0])])
    model.fit(x=X_formatted, y=y_formatted)

In [None]:
dataset = evaluate_dataset("D:\datasets\hmdb51_org")

X_train, X_test, y_train, y_test = train_test_split(
    dataset.path, 
    utils.to_categorical(LabelEncoder().fit_transform(dataset.category)))

In [None]:
model = create_model()

In [None]:
for i in len(X_train):
    train(model, X_train[i], y_train[i])