In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import random
import pandas as pd
import numpy as np
import os
import cv2
import torchvision.models as models
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import math
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import torchvision
import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
CFG = {
    'FPS':30,
    'IMG_SIZE':128,
    'EPOCHS':25,
    'LEARNING_RATE':4.6875e-06,
    #'LEARNING_RATE':3e-4,
    'BATCH_SIZE':16,
    'SEED':42,
    'BASE_DIM': 8,
    'NUM_CLASSES':5,
}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [None]:
path = '/content/drive/MyDrive/video_dete'
train_mp4 = f'{path}/train/'
test_mp4 = f'{path}/test/'
train_df = pd.read_csv(path+'/train.csv')
test_df = pd.read_csv(path+'/test.csv')

In [None]:
train_df

Unnamed: 0,id,path,label
0,TRAIN_000,./train/TRAIN_000.mp4,3
1,TRAIN_001,./train/TRAIN_001.mp4,0
2,TRAIN_002,./train/TRAIN_002.mp4,1
3,TRAIN_003,./train/TRAIN_003.mp4,4
4,TRAIN_004,./train/TRAIN_004.mp4,4
...,...,...,...
605,TRAIN_605,./train/TRAIN_605.mp4,0
606,TRAIN_606,./train/TRAIN_606.mp4,2
607,TRAIN_607,./train/TRAIN_607.mp4,1
608,TRAIN_608,./train/TRAIN_608.mp4,4


In [None]:
p = train_df['path'][1:]

In [None]:
train, val, _, _ = train_test_split(train_df, train_df['label'], test_size=0.3, random_state=CFG['SEED'])

In [None]:
train

Unnamed: 0,id,path,label
412,TRAIN_412,./train/TRAIN_412.mp4,2
167,TRAIN_167,./train/TRAIN_167.mp4,3
54,TRAIN_054,./train/TRAIN_054.mp4,0
564,TRAIN_564,./train/TRAIN_564.mp4,0
46,TRAIN_046,./train/TRAIN_046.mp4,2
...,...,...,...
71,TRAIN_071,./train/TRAIN_071.mp4,1
106,TRAIN_106,./train/TRAIN_106.mp4,1
270,TRAIN_270,./train/TRAIN_270.mp4,2
435,TRAIN_435,./train/TRAIN_435.mp4,4


In [None]:
val

Unnamed: 0,id,path,label
81,TRAIN_081,./train/TRAIN_081.mp4,2
218,TRAIN_218,./train/TRAIN_218.mp4,3
55,TRAIN_055,./train/TRAIN_055.mp4,1
598,TRAIN_598,./train/TRAIN_598.mp4,3
264,TRAIN_264,./train/TRAIN_264.mp4,2
...,...,...,...
550,TRAIN_550,./train/TRAIN_550.mp4,1
370,TRAIN_370,./train/TRAIN_370.mp4,0
259,TRAIN_259,./train/TRAIN_259.mp4,3
301,TRAIN_301,./train/TRAIN_301.mp4,2


In [None]:
#path, 동작 label


In [None]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list):
        self.video_path_list = video_path_list
        self.label_list = label_list
        
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        ori_path = '/content/drive/MyDrive/video_dete'
        cap = cv2.VideoCapture(ori_path + path[1:])
        for _ in range(CFG['FPS']):
            _, img = cap.read()
            img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
            img = img / 255.
            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)

In [None]:
train_dataset = CustomDataset(train['path'].values, train['label'].values)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['path'].values[1:], val['label'].values)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
class BaseModel(nn.Module):#과적합 막기기
    def __init__(self, num_classes=5):
        super(BaseModel, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Conv3d(3, 8, (3, 3, 3)),
            nn.BatchNorm3d(8),
            nn.ReLU(),
            #nn.Dropout(0.25),
            nn.MaxPool3d(2),
            nn.Conv3d(8, 32, (2, 2, 2)),
            nn.BatchNorm3d(32),
            nn.ReLU(),
            #nn.Dropout(0.25),
            nn.MaxPool3d(2),
            nn.Conv3d(32, 64, (2, 2, 2)),
            nn.BatchNorm3d(64),
            nn.ReLU(),
            #nn.Dropout(0.25),
            nn.MaxPool3d(2),
            nn.Conv3d(64, 128, (2, 2, 2)),
            nn.BatchNorm3d(128),
            nn.ReLU(),
            #nn.Dropout(0.25),
            nn.MaxPool3d((1, 7, 7)),
        )
        self.classifier = nn.Linear(512, num_classes)
        
    def forward(self, x):
        batch_size = x.size(0)
        x = self.feature_extract(x)
        x = x.view(batch_size, -1)
        x = self.classifier(x)
        return x

In [None]:
def conv_1(in_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, out_dim, 1, 1),
        nn.ReLU()
    )
    return model

def conv_1_3(in_dim, mid_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, mid_dim, 1, 1),
        nn.ReLU(),
        nn.Conv2d(mid_dim, out_dim, 3, 1, 1),
        nn.ReLU()
    )
    return model

def conv_1_5(in_dim, mid_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, mid_dim, 1, 1),
        nn.ReLU(),
        nn.Conv2d(mid_dim, out_dim, 5, 1, 2),
        nn.ReLU(),
    )
    return model

def max_3_1(in_dim, out_dim):
    model = nn.Sequential(
        nn.MaxPool2d(3, 1, 1),
        nn.Conv2d(in_dim, out_dim, 1, 1),
        nn.ReLU(),
    )
    return model

In [None]:
class inception_module(nn.Module):
    def __init__(self, in_dim, out_dim_1, mid_dim_3, out_dim_3, mid_dim_5, out_dim_5, pool):
        super(inception_module, self).__init__()

        self.conv_1 = conv_1(in_dim, out_dim_1)
        self.conv_1_3 = conv_1_3(in_dim, mid_dim_3, out_dim_3),
        self.conv_1_5 = conv_1_5(in_dim, mid_dim_5, out_dim_5),
        self.max_3_1 = max_3_1(in_dim, pool)

    def forward(self, x):
        out_1 = self.conv_1(x)
        out_2 = self.conv_1_3(x)
        out_3 = self.conv_1_5(x)
        out_4 = self.max_3_1(x)
        output = torch.cat([out_1, out_2, out_3, out_4], 1)
        return output

3D-CNN

In [None]:
class Base3DCNN(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn = nn.BatchNorm3d(out_channels)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        
        return x

In [None]:
class Base3DCNN_2(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        
        #self.pool = nn.MaxPool3d(kernel_size=3, stride=2)
        self.bn = nn.BatchNorm3d(out_channels)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        #x = self.pool(x)
        x = self.bn(x)
        
        
        return x

In [None]:
class Simple3DCNN_2(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        
        #self.bn = nn.BatchNorm3d(out_channels)
        self.layer1 = nn.Sequential(
            Base3DCNN_2(in_channels, 16),
            nn.MaxPool3d(kernel_size=2, stride=2)
        )
        
        self.layer2 = nn.Sequential(
            Base3DCNN_2(16, 32),
            nn.MaxPool3d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            Base3DCNN_2(32, 64),
            nn.MaxPool3d(kernel_size=2, stride=2)
        )
        self.layer4 = nn.Sequential(
            Base3DCNN_2(64, 128),
            nn.MaxPool3d(kernel_size=2, stride=2)
        )
        self.dense1 = nn.Sequential(
            nn.Linear(8192, num_classes),
            nn.ReLU()
        )
        self.avgpool = nn.AvgPool3d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout3d(0.3)
        self.fc = nn.Linear(43200, num_classes)
        #self.fc = nn.Linear(32*CFG['IMG_SIZE']*CFG['IMG_SIZE']*3, num_classes)  # Replace X, Y, Z with the dimensions of your feature map
        #X, Y, Z = depth, height, width
        #여기서 depth 는 3(RGB사용)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        #x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        #x = self.dropout(x)
        x = self.dense1(x)
        
        return x

In [None]:
class Simple3DCNN(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        
        self.layer1 = nn.Sequential(
            Base3DCNN(in_channels, 16),
            nn.MaxPool3d(kernel_size=3, stride=2)
        )
        
        self.layer2 = nn.Sequential(
            Base3DCNN(16, 32),
            nn.MaxPool3d(kernel_size=3, stride=2)
        )
        self.dropout = nn.Dropout3d(0.5)
        self.fc = nn.Linear(43200, num_classes)
        #self.fc = nn.Linear(32*CFG['IMG_SIZE']*CFG['IMG_SIZE']*3, num_classes)  # Replace X, Y, Z with the dimensions of your feature map
        #X, Y, Z = depth, height, width
        #여기서 depth 는 3(RGB사용)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0), -1)
        #x = self.dropout(x)
        x = self.fc(x)
        
        return x

In [None]:
def train2(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    patience = 5
    counter = 0

    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(videos)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        __train_loss.append(_train_loss)
        __val_loss.append(_val_loss)
        __val_score.append(_val_score)

        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
        torch.save(model.state_dict(), f'{path}/model/cnn3d_2.h5')
    print("Model saved")

    
    return best_model, __train_loss, __val_loss, __val_score

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    patience = 5
    counter = 0



    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(videos)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        __train_loss.append(_train_loss)
        __val_loss.append(_val_loss)
        __val_score.append(_val_score)

        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
            torch.save(model.state_dict(), f'{path}/model/cnn3d_2.h5')
            print("Model saved")
            counter = 0
        else:
            counter += 1

        if counter >= patience:
            print("train Stop")
            break

    
    return best_model, __train_loss, __val_loss, __val_score

In [None]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            logit = model(videos)
            
            loss = criterion(logit, labels)
            
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

In [None]:
load_model = True
pretrained_md = False
if not load_model and not pretrained_md:
    model = Simple3DCNN_2(3, CFG['NUM_CLASSES']).to(device)
    #model = ResNet3D(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], CFG['NUM_CLASSES'])
elif not pretrained_md and load_model == True:
    model = Simple3DCNN_2(3, CFG['NUM_CLASSES']).to(device)
    model.load_state_dict(torch.load(f'{path}/model/cnn3d_2.h5'))
elif pretrained_md and not load_model:
    model = models.video.r3d_18(pretrained=True)
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"], weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

__train_loss = []
__val_loss = []
__val_score = []

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.55775] Val Loss : [2.35791] Val F1 : [0.20258]
Model saved


  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.56293] Val Loss : [2.38570] Val F1 : [0.21110]
Model saved


  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.55387] Val Loss : [2.39505] Val F1 : [0.21110]


  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.60837] Val Loss : [2.45569] Val F1 : [0.20924]


  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.49082] Val Loss : [2.48440] Val F1 : [0.20909]
Epoch 00005: reducing learning rate of group 0 to 2.3437e-06.


  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.53483] Val Loss : [2.48269] Val F1 : [0.20403]


  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.52115] Val Loss : [2.50659] Val F1 : [0.20436]
train Stop


In [None]:
import matplotlib.pyplot as plt

plt.plot(__train_loss, label='Train loss')
plt.plot(__val_loss, label='Validation loss')
plt.plot(__val_score, label='Validation Score')
plt.legend()
plt.show()




In [None]:
test = pd.read_csv(path+'/test.csv')
test_dataset = CustomDataset(test['path'].values, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            
            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [None]:
preds = inference(model, test_loader, device)

  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
submit = pd.read_csv(path+'/sample_submission.csv')

In [None]:
submit['label'] = preds
submit.head()

Unnamed: 0,id,label
0,TEST_000,0
1,TEST_001,3
2,TEST_002,0
3,TEST_003,2
4,TEST_004,3


In [None]:
submit.to_csv('./baseline_submit.csv', index=False)

In [None]:
from google.colab import files
files.download('baseline_submit.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>