In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
import cv2
import os.path
import pandas as pd
import numpy as np
from tensorflow.keras import utils
from sklearn.model_selection import train_test_split
from tensorflow import keras
import os
import shutil

In [None]:
PROCESSED_IMAGE_HEIGHT = 128
PROCESSED_IMAGE_WIDTH = 128
DOWNSAMPLING_FRAMES = 40.0
GRAYSCALE = True

## Function Definitions

In [None]:
def evaluate_dataset(path="D:\datasets\hmdb51_org"):
    df = pd.DataFrame()
    for path, directories, files in os.walk(path):
        for f in files:
            df = df.append({
                "path": path + "\\" + f,
                "filename": f,
                "category": path.split("\\")[-1]},
                ignore_index=True)
    return df #paths, filenames, categories

In [None]:
def downsample_video(video, frames = DOWNSAMPLING_FRAMES):
    framecount = video.shape[0]
    sampled_frames = np.arange(0, framecount, framecount / frames)
    downsampled_video = [video[int(f)] for f in sampled_frames]
    return np.array(downsampled_video)

In [None]:
def preprocess_image(image, width=PROCESSED_IMAGE_WIDTH, height=PROCESSED_IMAGE_HEIGHT, grayscale=GRAYSCALE):
    if grayscale:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # resize
    image = cv2.resize(image, (width, height,))
    # reshape for keras
    # image = image.reshape((height, width, 1))
    # normalize
    image = image / 255.0
    return image#np.float32(image)

In [None]:
def get_formatted_video(path):
    cap = cv2.VideoCapture(path)
    video = []
    retval, image = cap.read()
    while retval:
        image = preprocess_image(image)
        video.append(image)
        retval, image = cap.read()
    cap.release()
    print(path + " / frames " + str(np.array(video).shape[0]))
    video = downsample_video(np.array(video))
    return video

In [None]:
def create_batch(X_paths, y, batch_size=16, grayscale=True):
    for i in range(0, len(X_paths), batch_size):
        X_batch = []
        y_batch = []
        for b in range(i, i+batch_size):
            if b == len(X_paths):
                break
            X_batch.append(get_formatted_video(X_paths[b], grayscale=grayscale))
            y_batch.append(y[b])

        yield (np.array(X_batch), np.vstack(y_batch))

In [None]:
def save_video(videoarray, filename):
    frames, height, width = (videoarray.shape)[0:3]
    if len(videoarray.shape) == 4:
        out = cv2.VideoWriter(filename, cv2.VideoWriter_fourcc('M','J','P','G'), 10, (width, height))
    else:
        out = cv2.VideoWriter(filename, cv2.VideoWriter_fourcc('M','J','P','G'), 10, (width, height), 0) #grayscale
    for frame in videoarray:
        out.write(frame)
    out.release()

In [None]:
def convert_to_path(dataset, directory):
    for _, row in dataset.iterrows():
    #for i in range(len(paths)):
        os.makedirs(directory + str(row.category) + "/", exist_ok=True)
        vid = get_formatted_video(row.path)
        save_video(vid, directory + str(row.category) + "/" + row.filename)

## Downlaod Dataset

Needs wget and 7zip command line tools.

In [None]:
# download
!wget https://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar

In [None]:
# extract
!7z e hmdb51_org.rar -ohmdb51 -r &&

In [None]:
# extract nested archives
!cd temp && 7z x *.rar -o../hmdb51

In [None]:
# cleanup
shutil.rmtree("temp")
os.remove("hmdb51_org.rar")

## Preprocess dataset

In [None]:
dataset = evaluate_dataset("hmdb51")

train, test = train_test_split(dataset, random_state=42)
convert_to_path(train, "train/")
convert_to_path(test, "test/")