In [2]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
 
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action = 'ignore')

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

#### Hyperparameter Setting

In [5]:
CFG = {
    'FPS' : 30,
    'IMG_SIZE' : 128,
    'EPOCHS' : 10,
    'LEARNING_RATE' : 3e-4,
    'BATCH_SIZE' : 4,
    'SEED' : 41
}

#### Fixed RandomSeed

In [7]:
def seed_everything(seed) :
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

#### Data Load

In [8]:
df = pd.read_csv('./train.csv')

In [9]:
df

Unnamed: 0,id,path,label
0,TRAIN_000,./train/TRAIN_000.mp4,3
1,TRAIN_001,./train/TRAIN_001.mp4,0
2,TRAIN_002,./train/TRAIN_002.mp4,1
3,TRAIN_003,./train/TRAIN_003.mp4,4
4,TRAIN_004,./train/TRAIN_004.mp4,4
...,...,...,...
605,TRAIN_605,./train/TRAIN_605.mp4,0
606,TRAIN_606,./train/TRAIN_606.mp4,2
607,TRAIN_607,./train/TRAIN_607.mp4,1
608,TRAIN_608,./train/TRAIN_608.mp4,4


#### Train Valid Split

In [71]:
train, val, _, _ = train_test_split(df, df['label'], test_size = 0.2, random_state = CFG['SEED'])

In [72]:
val

Unnamed: 0,id,path,label
515,TRAIN_515,./train/TRAIN_515.mp4,3
190,TRAIN_190,./train/TRAIN_190.mp4,0
327,TRAIN_327,./train/TRAIN_327.mp4,1
325,TRAIN_325,./train/TRAIN_325.mp4,0
459,TRAIN_459,./train/TRAIN_459.mp4,4
...,...,...,...
226,TRAIN_226,./train/TRAIN_226.mp4,2
377,TRAIN_377,./train/TRAIN_377.mp4,0
498,TRAIN_498,./train/TRAIN_498.mp4,0
553,TRAIN_553,./train/TRAIN_553.mp4,4


#### CustomDataset

In [21]:
class CustomDataset(Dataset) :
    def __init__(self, video_path_list, label_list) :
        self.video_path_list = video_path_list
        self.label_list = label_list

    def __getitem__(self, index) :
        frames = self.get_video(self.video_path_list[index])

        if self.label_list is not None :
            label = self.label_list[index]
            return frames, label
        else :
            return frames

    def __len__(self) :
        return len(self.video_path_list)

    def get_video(self, path) :
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['FPS']) :
            _, img = cap.read()
            img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
            img = img/255.
            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3,0,1,2)


In [25]:

train_dataset = CustomDataset(video_path_list = train['path'].values , label_list = train['label'].values)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle = True, num_workers = 0)

val_dataset = CustomDataset(video_path_list = val['path'].values , label_list = val['label'].values)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle = False, num_workers = 0)

#### Define Model

In [31]:
class BaseModel(nn.Module) :
    def __init__(self, num_classes = 5) :
        super(BaseModel, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Conv3d(3, 8, (3,3,3)),
            nn.ReLU(),
            nn.BatchNorm3d(8), 
            nn.MaxPool3d(2),
            nn.Conv3d(8, 32, (2,2,2)),
            nn.ReLU(),
            nn.BatchNorm3d(32),
            nn.MaxPool3d(2),
            nn.Conv3d(32, 64, (2,2,2)),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.MaxPool3d(2),
            nn.Conv3d(64, 128, (2,2,2)),
            nn.ReLU(),
            nn.BatchNorm3d(128),
            nn.MaxPool3d((1,7,7))
        )
        self.classifier = nn.Linear(512, num_classes)

    def forward(self, x) :
        batch_size = x.shape[0]
        res = self.feature_extract(x)
        res = res.view(batch_size, -1)
        res = self.classifier(res)

        return res

In [32]:
from torchsummary import summary
model = BaseModel().to(device)
summary(model, input_size = ( 3, CFG['FPS'], CFG['IMG_SIZE'], CFG['IMG_SIZE']))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1      [-1, 8, 28, 126, 126]             656
              ReLU-2      [-1, 8, 28, 126, 126]               0
       BatchNorm3d-3      [-1, 8, 28, 126, 126]              16
         MaxPool3d-4        [-1, 8, 14, 63, 63]               0
            Conv3d-5       [-1, 32, 13, 62, 62]           2,080
              ReLU-6       [-1, 32, 13, 62, 62]               0
       BatchNorm3d-7       [-1, 32, 13, 62, 62]              64
         MaxPool3d-8        [-1, 32, 6, 31, 31]               0
            Conv3d-9        [-1, 64, 5, 30, 30]          16,448
             ReLU-10        [-1, 64, 5, 30, 30]               0
      BatchNorm3d-11        [-1, 64, 5, 30, 30]             128
        MaxPool3d-12        [-1, 64, 2, 15, 15]               0
           Conv3d-13       [-1, 128, 1, 14, 14]          65,664
             ReLU-14       [-1, 128, 1,

#### Train

In [51]:
def train(mode, optimizer, train_loader, val_loader, scheduler, device) :

    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)

    best_val_score = 0
    best_model = None

    for epoch in range(1, CFG['EPOCHS'] + 1 ) :
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)) :
            videos = videos.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            output = model(videos)
            loss = criterion(output, labels)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')

        if scheduler is not None :
            scheduler.step(_val_score)

        if best_val_score < _val_score :
            best_val_score = _val_score
            best_model = model

    return best_model




In [52]:
def validation(model, criterion, val_loader, device) :
    model.eval()
    val_loss = []
    preds, trues = [], []

    with torch.no_grad() : 
        for videos, labels in tqdm(iter(val_loader)):

            videos = videos.to(device)
            labels = labels.to(device)

            logit = model(videos)
            
            loss = criterion(logit, labels)

            val_loss.append(loss.item())

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()

        _val_loss = np.mean(val_loss)

        _val_score = f1_score(trues, preds, average = 'macro')

    return _val_loss, _val_score

In [53]:
model = BaseModel()
model.eval()

optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'max', factor = 0.5, patience = 2, threshold_mode = 'abs', min_lr = 1e-8, verbose = True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)


100%|██████████| 122/122 [00:10<00:00, 11.28it/s]
100%|██████████| 31/31 [00:02<00:00, 14.62it/s]


Epoch [1], Train Loss : [1.36133] Val Loss : [0.96310] Val F1 : [0.62395]


100%|██████████| 122/122 [00:10<00:00, 11.21it/s]
100%|██████████| 31/31 [00:02<00:00, 14.97it/s]


Epoch [2], Train Loss : [0.64865] Val Loss : [0.73437] Val F1 : [0.68912]


100%|██████████| 122/122 [00:11<00:00, 10.30it/s]
100%|██████████| 31/31 [00:02<00:00, 14.63it/s]


Epoch [3], Train Loss : [0.44649] Val Loss : [0.72302] Val F1 : [0.71027]


100%|██████████| 122/122 [00:10<00:00, 11.38it/s]
100%|██████████| 31/31 [00:02<00:00, 15.17it/s]


Epoch [4], Train Loss : [0.25640] Val Loss : [0.64991] Val F1 : [0.74500]


100%|██████████| 122/122 [00:10<00:00, 11.34it/s]
100%|██████████| 31/31 [00:02<00:00, 15.05it/s]


Epoch [5], Train Loss : [0.19191] Val Loss : [0.57994] Val F1 : [0.77084]


100%|██████████| 122/122 [00:11<00:00, 11.08it/s]
100%|██████████| 31/31 [00:02<00:00, 14.62it/s]


Epoch [6], Train Loss : [0.13618] Val Loss : [0.61582] Val F1 : [0.80601]


100%|██████████| 122/122 [00:10<00:00, 11.22it/s]
100%|██████████| 31/31 [00:02<00:00, 14.47it/s]


Epoch [7], Train Loss : [0.10010] Val Loss : [0.67598] Val F1 : [0.76869]


100%|██████████| 122/122 [00:10<00:00, 11.10it/s]
100%|██████████| 31/31 [00:02<00:00, 14.82it/s]


Epoch [8], Train Loss : [0.04235] Val Loss : [0.58148] Val F1 : [0.81310]


100%|██████████| 122/122 [00:11<00:00, 11.09it/s]
100%|██████████| 31/31 [00:02<00:00, 15.12it/s]


Epoch [9], Train Loss : [0.03515] Val Loss : [0.64724] Val F1 : [0.75146]


100%|██████████| 122/122 [00:10<00:00, 11.37it/s]
100%|██████████| 31/31 [00:02<00:00, 14.77it/s]

Epoch [10], Train Loss : [0.07299] Val Loss : [0.90330] Val F1 : [0.71681]





#### test

In [54]:
test = pd.read_csv('./test.csv')

In [56]:
test_dataset = CustomDataset(test['path'].values, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle = False, num_workers = 0)

In [59]:
def inference(model, test_loader, device) :
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad() :
        for videos in tqdm(iter(test_loader)) :
            videos = videos.to(device)

            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [60]:
preds = inference(infer_model, test_loader, device)

100%|██████████| 39/39 [00:03<00:00, 11.94it/s]


In [63]:
submit = pd.read_csv('./sample_submission.csv')

In [64]:
submit['label'] = preds
submit.head()

Unnamed: 0,id,label
0,TEST_000,1
1,TEST_001,3
2,TEST_002,0
3,TEST_003,2
4,TEST_004,4


In [65]:
submit.to_csv('./baseline_submit.csv', index = False)

In [6]:
from sklearn.metrics import f1_score

f1_score([1,1,0], [0,0,1], average  = 'macro')

0.0

In [7]:
a, b = zip(*[(1,0), (2,1)])

1.5