In [1]:
import torch
import numpy as np
import pandas as pd
import os
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
from torchvision.transforms import ToTensor, RandomCrop

In [15]:
def video_loader(video_path, n_frames, input_frames):
    start_frame = np.random.randint(1, n_frames + 1 - input_frames)
    clip = []
    for i in range(start_frame, start_frame + input_frames):
        img_path = os.path.join(video_path, 'image_{:05d}.jpg'.format(i))
        img = Image.open(img_path)
        clip.append(img)
    return clip

In [16]:
class Kinetics(Dataset):
    """
    Dataset class for Kinetics
    """
    
    def __init__(self, config, transform=None, mode = 'training'):
        super().__init__()
        
        self.config =config
        
        if mode == 'test':
            self.df = pd.read_csv(self.config.test_csv)
        else:
            self.df = pd.read_csv(self.config.train_csv)
        
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        video_path = self.df.iloc[idx, 0]
        cls = self.df.iloc[idx, 1]
        cls_id = torch.tensor(int(self.df.iloc[idx, 2])).long()
        n_frames = int(self.df.iloc[idx, 3])
        
        clip = video_loader(video_path, n_frames, self.config.input_frames)
        
        if self.transform is not None:
            clip = [self.transform(clip[i]) for i in range(len(clip))]
            
        clip = torch.stack(clip, 0).permute(1, 0, 2, 3)
        
        sample = {
            'clip': clip,
            'class': cls,
            'cls_id': cls_id,
        }
        
        return sample

In [2]:
import yaml
from addict import Dict
from torch.utils.data import DataLoader
from utils.dataset import Kinetics
from torchvision import transforms
from torchvision.transforms import ToTensor, RandomCrop
import numpy as np

In [3]:
CONFIG = Dict(yaml.safe_load(open('./config.yaml')))

In [4]:
train_data = Kinetics(
    CONFIG, 
    transform=transforms.Compose([
        RandomCrop((CONFIG.height, CONFIG.width)),
        ToTensor()
    ])
)

In [5]:
train_loader = DataLoader(
    train_data, batch_size=CONFIG.batch_size, shuffle=True, num_workers=CONFIG.num_workers, drop_last=True)

In [6]:
for sample in train_loader:
    x = sample['clip']
    print(x.shape)
    break

torch.Size([8, 3, 16, 224, 224])


In [7]:
import torch

In [8]:
from torchvision.transforms import ToTensor, RandomCrop, Compose

In [10]:
from model.resnet import resnet18

In [11]:
x

tensor([[[[[0.3490, 0.3490, 0.3490,  ..., 0.3373, 0.3451, 0.3490],
           [0.3490, 0.3490, 0.3490,  ..., 0.4039, 0.4157, 0.4196],
           [0.3490, 0.3490, 0.3490,  ..., 0.4784, 0.4863, 0.4902],
           ...,
           [0.6431, 0.6431, 0.6431,  ..., 0.6824, 0.6745, 0.6627],
           [0.6510, 0.6510, 0.6510,  ..., 0.6824, 0.6745, 0.6627],
           [0.6078, 0.6078, 0.6078,  ..., 0.7216, 0.7216, 0.7176]],

          [[0.3608, 0.3608, 0.3608,  ..., 0.9490, 0.9490, 0.9490],
           [0.3608, 0.3608, 0.3608,  ..., 0.9490, 0.9490, 0.9490],
           [0.3608, 0.3608, 0.3608,  ..., 0.9490, 0.9490, 0.9490],
           ...,
           [0.6157, 0.6157, 0.6157,  ..., 0.8588, 0.8549, 0.8471],
           [0.6235, 0.6235, 0.6235,  ..., 0.8627, 0.8275, 0.8196],
           [0.6235, 0.6235, 0.6235,  ..., 0.8549, 0.8275, 0.8196]],

          [[0.3608, 0.4471, 0.4784,  ..., 0.2431, 0.2431, 0.2431],
           [0.3725, 0.4627, 0.4627,  ..., 0.2431, 0.2431, 0.2431],
           [0.3843, 0.4706

In [14]:
model = resnet18(sample_size=224, sample_duration=16)

  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')


In [15]:
y = model(x)

In [16]:
y.shape

torch.Size([8, 400])