In [16]:
from transformers import TimesformerConfig, TimesformerModel, AutoImageProcessor
import torch
import os
import cv2
import random
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
import torchvision

In [2]:
df = pd.read_csv("../data/mirror-data.csv")
df["Action"] = df["Action"].str.rstrip()
df = df[df.Action != "Talking&Yawning"]
df["label"] = df.Action.astype('category').cat.codes

In [3]:
i = 0
dfTrain = pd.DataFrame()
dfTest = pd.DataFrame()

while i<len(df):
    if i%5==0:
        dfTest = pd.concat([dfTest, df.iloc[[i]]])
    else :
        dfTrain = pd.concat([dfTrain, df.iloc[[i]]])
    i+=1

In [4]:
IMG_SIZE = 224
MAX_SEQ_LENGTH = 8
frame_step = 1

In [17]:
def frames_from_video_file(video_path, MAX_SEQ_LENGTH, frame_step, output_size = (IMG_SIZE, IMG_SIZE)):
    result = []
  
    reader  = torchvision.io.VideoReader(str(video_path))

    for i in range(MAX_SEQ_LENGTH):
        frame = next(reader)
        print(frame)
        result.append(frame['data'])

    return result

In [24]:
class VideoDataset(torch.utils.data.Dataset):
    def __init__(self, df, root_dir, MAX_SEQ_LENGTH, frame_step):
        self.video_paths = df["video-name"].values.tolist()
        self.labels = df["label"].values.tolist()
        self.n_frames = MAX_SEQ_LENGTH
        self.root_dir = root_dir
        self.frame_step = frame_step
        
    def __len__(self):
        return len(self.video_paths)
    
    def __getitem__(self, idx):
        path = self.video_paths[idx]
        label = self.labels[idx]
        frames = frames_from_video_file(os.path.join(self.root_dir, path), self.n_frames, self.frame_step)
        frames = torch.stack(frames)
        output = {
                'video': frames,
                'target': label
                }
        return output

In [25]:
train_ds = VideoDataset(dfTrain, "../data/YawDD/YawDD dataset/Mirror/all/", MAX_SEQ_LENGTH, frame_step)
test_ds = VideoDataset(dfTest, "../data/YawDD/YawDD dataset/Mirror/all/", MAX_SEQ_LENGTH, frame_step)

In [26]:
train_loader = DataLoader(train_ds, batch_size=4)
val_loader = DataLoader(test_ds, batch_size=4)

In [29]:
import imageio
import numpy as np
from IPython.display import Image

def unnormalize_img(img):
    """Un-normalizes the image pixels."""
    img = (img * 255).astype("uint8")
    return img.clip(0, 255)

def create_gif(video_tensor, filename="sample.gif"):
    """Prepares a GIF from a video tensor.
    
    The video tensor is expected to have the following shape:
    (num_frames, num_channels, height, width).
    """
    frames = []
    for video_frame in video_tensor:
        frame_unnormalized = unnormalize_img(video_frame.permute(1, 2, 0).numpy())
        frames.append(frame_unnormalized)
    kargs = {"duration": 0.25}
    imageio.mimsave(filename, frames, "GIF", **kargs)
    return filename

def display_gif(video_tensor, gif_name="sample.gif"):
    """Prepares and displays a GIF from a video tensor."""
    video_tensor = video_tensor.permute(1, 0, 2, 3)
    gif_filename = create_gif(video_tensor, gif_name)
    return Image(filename=gif_filename)

sample_video = train_ds.__getitem__(0)
video_tensor = sample_video["video"]
display_gif(video_tensor)

{'data': tensor([[[253, 253, 253,  ..., 215, 229, 241],
         [253, 253, 253,  ..., 243, 243, 245],
         [253, 253, 253,  ..., 251, 244, 239],
         ...,
         [133, 144, 150,  ..., 249, 246, 246],
         [126, 140, 150,  ..., 253, 254, 254],
         [123, 137, 150,  ..., 253, 254, 254]],

        [[253, 253, 253,  ..., 224, 240, 252],
         [253, 253, 253,  ..., 252, 254, 255],
         [253, 253, 253,  ..., 254, 249, 244],
         ...,
         [148, 159, 167,  ..., 254, 255, 255],
         [141, 155, 167,  ..., 253, 252, 252],
         [138, 152, 167,  ..., 253, 252, 252]],

        [[253, 253, 253,  ..., 235, 253, 255],
         [253, 253, 253,  ..., 255, 255, 255],
         [253, 253, 253,  ..., 255, 255, 254],
         ...,
         [162, 173, 173,  ..., 253, 253, 253],
         [155, 169, 173,  ..., 253, 253, 253],
         [152, 166, 173,  ..., 253, 253, 253]]], dtype=torch.uint8), 'pts': 0.0}
{'data': tensor([[[253, 253, 253,  ..., 220, 240, 255],
         