Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2
import torch
import gc
import torch.nn as nn
import torch.nn.functional as F
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from torchvision.models.video import r3d_18
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split, KFold

from google.colab import drive
drive.mount('/content/drive')


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

Mounted at /content/drive


Hyper Parameter Setting

In [2]:
CFG = {
    'VIDEO_LENGTH':50, # 16프레임 * 5초
    'IMG_SIZE':128,
    'EPOCHS':10,
    #'LEARNING_RATE':3e-4,
    'LEARNING_RATE':1.5000e-06,
    'BATCH_SIZE':16,
    'SEED':41,
    'ACCUMULATION':60,
    'FOLD':5,
    'NUM_CLASSES':13
}

Fixed Random Seed

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

Load Data

In [4]:
#Data Path
path = '/content/drive/MyDrive/car_data'
df = pd.read_csv(f'{path}/train.csv')
test = pd.read_csv(f'{path}/test.csv')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2698 entries, 0 to 2697
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   sample_id   2698 non-null   object
 1   video_path  2698 non-null   object
 2   label       2698 non-null   int64 
dtypes: int64(1), object(2)
memory usage: 63.4+ KB


Data ReLabeling

In [6]:
new_label = [[0,['No', None, None, None]], [1,['Yes', 'Yes', 'Normal', 'Day']], [2,['Yes', 'Yes', 'Normal', 'Night']],[ 3,['Yes', 'Yes', 'Snowy', 'Day']], [4,['Yes', 'Yes', 'Snowy', 'Night']], [5,['Yes', 'Yes', 'Rainy', 'Day']], [6,['Yes', 'Yes', 'Rainy', 'Night']], [7,['Yes','No', 'Normal', 'Day']], [8,['Yes','No', 'Normal', 'Night']], [9,['Yes','No', 'Snowy', 'Day']], [10,['Yes','No', 'Snowy', 'Night']], [11,['Yes','No', 'Rainy', 'Day']], [12,['Yes','No', 'Rainy', 'Night']]]

In [7]:
crash = []
ego = []
weather = []
timing = []
for idx in (df.index):
    id = df['label'][idx]
    crash.append(new_label[id][1][0])
    ego.append(new_label[id][1][1])
    weather.append(new_label[id][1][2])
    timing.append(new_label[id][1][3])

df['crash'] = crash
df['ego'] = ego
df['weather_'] = weather
df['timing_'] = timing

In [8]:

ego_crash = []
for idx in (df.index):
    if df['crash'][idx] == 'No':
        ego_crash.append(0)
    elif df['crash'][idx] == 'Yes' and df['ego'][idx] == 'Yes':
        ego_crash.append(1)
    elif df['crash'][idx] == 'Yes' and df['ego'][idx] == 'No':
        ego_crash.append(2)
df['ego+crash'] = ego_crash
weather = []
for idx in (df.index):
    if df['weather_'][idx] == 'Normal':
        weather.append(0)
    elif df['weather_'][idx] == 'Snowy':
        weather.append(1)
    elif df['weather_'][idx] == 'Rainy':
        weather.append(2)
    elif df['weather_'][idx] == None:
        weather.append(-1)
df['weather'] = weather

timing = []
for idx in (df.index):
    if df['timing_'][idx] == 'Day':
        timing.append(0)
    elif df['timing_'][idx] == 'Night':
        timing.append(1)
    elif df['timing_'][idx] == None:
        timing.append(-1)

df['timing'] = timing

In [9]:
df = df.drop(['crash', 'ego', 'weather_', 'timing_'], axis=1)

In [10]:
df

Unnamed: 0,sample_id,video_path,label,ego+crash,weather,timing
0,TRAIN_0000,./train/TRAIN_0000.mp4,7,2,0,0
1,TRAIN_0001,./train/TRAIN_0001.mp4,7,2,0,0
2,TRAIN_0002,./train/TRAIN_0002.mp4,0,0,-1,-1
3,TRAIN_0003,./train/TRAIN_0003.mp4,0,0,-1,-1
4,TRAIN_0004,./train/TRAIN_0004.mp4,1,1,0,0
...,...,...,...,...,...,...
2693,TRAIN_2693,./train/TRAIN_2693.mp4,3,1,1,0
2694,TRAIN_2694,./train/TRAIN_2694.mp4,5,1,2,0
2695,TRAIN_2695,./train/TRAIN_2695.mp4,0,0,-1,-1
2696,TRAIN_2696,./train/TRAIN_2696.mp4,0,0,-1,-1


Delete Error Data

In [11]:
error_data = [8, 124, 330, 387, 486, 1113, 1533, 2292]
for error in error_data:
    df.drop(index=error, inplace=True)

df = df.reset_index(drop=True)


In [14]:
# #EDA
# df_l = len(df)
# for i in tqdm(range(df_l)):
#     new_data = {'sample_id': f'TRAIN_{i+df_l:04}',
#                 'video_path': f'./train/train_aug/TRAIN_{i+df_l:04}.mp4',
#                 'label': df['label'][i],
#                 'crash_ego': df['ego+crash'][i],
#                 'weather_timing': df['weather_timing'][i],
#                }
#     df_new = df.append(new_data, ignore_index=True)

Unnamed: 0,sample_id,video_path,label,ego+crash,weather,timing
0,TRAIN_0000,./train/TRAIN_0000.mp4,7,2,0,0
1,TRAIN_0001,./train/TRAIN_0001.mp4,7,2,0,0
2,TRAIN_0002,./train/TRAIN_0002.mp4,0,0,-1,-1
3,TRAIN_0003,./train/TRAIN_0003.mp4,0,0,-1,-1
4,TRAIN_0004,./train/TRAIN_0004.mp4,1,1,0,0
...,...,...,...,...,...,...
2685,TRAIN_2693,./train/TRAIN_2693.mp4,3,1,1,0
2686,TRAIN_2694,./train/TRAIN_2694.mp4,5,1,2,0
2687,TRAIN_2695,./train/TRAIN_2695.mp4,0,0,-1,-1
2688,TRAIN_2696,./train/TRAIN_2696.mp4,0,0,-1,-1


In [12]:
from sklearn.utils import resample

In [24]:
# df_t = df.loc[:, ['sample_id', 'video_path', 'timing']]
# df_t = df_t[df_t['timing']>-1]
# df_t
# print(df_t["timing"].value_counts(dropna=False))
# df_t0 = df_t[df_t['timing'] == 0]
# df_t1 = df_t[df_t['timing'] == 1]
# #df_w2 = df_w[df_w['timing'] == 2]

# df_tdown = resample(df_t0, replace = False, n_samples=len(df_t1), random_state=42)
# df_t = pd.concat([df_tdown, df_t1])
# print(df_t["timing"].value_counts(dropna=False))


0    808
1    106
Name: timing, dtype: int64
0    106
1    106
Name: timing, dtype: int64


In [None]:

#crash_ego data
crash_df = df.drop(['weather', 'label', 'timing'], axis=1)
#weather data
weather_df = df.drop(['label', 'timing', 'ego+crash'], axis=1)
#timing data
timing_df = df.drop(['label', 'weather', 'ego+crash'], axis=1)

#weather data와 timing data는 label=0이 아닌 데이터도 준비
non_crash = df[df['label'] !=0]
weather_df = non_crash.drop(['label', 'timing', 'ego+crash'], axis=1)
weather_df.reset_index(drop=True)
timing_df = non_crash.drop(['label', 'weather', 'ego+crash'], axis=1)
timing_df.reset_index(drop=True)

Unnamed: 0,sample_id,video_path,timing
0,TRAIN_0000,./train/TRAIN_0000.mp4,0
1,TRAIN_0001,./train/TRAIN_0001.mp4,0
2,TRAIN_0004,./train/TRAIN_0004.mp4,0
3,TRAIN_0006,./train/TRAIN_0006.mp4,0
4,TRAIN_0007,./train/TRAIN_0007.mp4,0
...,...,...,...
909,TRAIN_2685,./train/TRAIN_2685.mp4,1
910,TRAIN_2689,./train/TRAIN_2689.mp4,0
911,TRAIN_2692,./train/TRAIN_2692.mp4,0
912,TRAIN_2693,./train/TRAIN_2693.mp4,0


fix datatype

In [None]:
crash_df.astype({'ego+crash':'int64'})
weather_df.astype({'weather':'int64'})
timing_df.astype({'timing':'int64'})

Unnamed: 0,sample_id,video_path,timing
0,TRAIN_0000,./train/TRAIN_0000.mp4,0
1,TRAIN_0001,./train/TRAIN_0001.mp4,0
4,TRAIN_0004,./train/TRAIN_0004.mp4,0
6,TRAIN_0006,./train/TRAIN_0006.mp4,0
7,TRAIN_0007,./train/TRAIN_0007.mp4,0
...,...,...,...
2677,TRAIN_2685,./train/TRAIN_2685.mp4,1
2681,TRAIN_2689,./train/TRAIN_2689.mp4,0
2684,TRAIN_2692,./train/TRAIN_2692.mp4,0
2685,TRAIN_2693,./train/TRAIN_2693.mp4,0


In [None]:
crash_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2690 entries, 0 to 2689
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   sample_id   2690 non-null   object
 1   video_path  2690 non-null   object
 2   ego+crash   2690 non-null   int64 
dtypes: int64(1), object(2)
memory usage: 63.2+ KB


Calculate_weight

In [None]:
from sklearn.utils.class_weight import compute_class_weight
crash_labels = [i for i in crash_df['ego+crash']]
crash_labels.sort()
crash_class_weights = compute_class_weight(class_weight = "balanced", classes=np.unique(crash_labels), y=crash_labels)
crash_class_weights = torch.FloatTensor(crash_class_weights).to(device)

weather_labels = [i for i in weather_df['weather']]
weather_labels.sort()
weather_class_weights = compute_class_weight(class_weight = "balanced", classes=np.unique(weather_labels), y=weather_labels)
weather_class_weights = torch.FloatTensor(weather_class_weights).to(device)

timing_labels = [i for i in timing_df['timing']]
timing_labels.sort()
timing_class_weights = compute_class_weight(class_weight = "balanced", classes=np.unique(timing_labels), y=timing_labels)
timing_class_weights = torch.FloatTensor(timing_class_weights).to(device)

crash_class_weights, weather_class_weights, timing_class_weights


(tensor([0.5049, 1.8262, 2.1198], device='cuda:0'),
 tensor([0.4261, 2.3618, 4.3524], device='cuda:0'),
 tensor([0.5656, 4.3113], device='cuda:0'))

##Train / Validation Split

In [None]:
train_crash, val_crash, _, _ = train_test_split(crash_df, crash_df['ego+crash'], test_size=0.2, random_state=CFG['SEED'], stratify=crash_df['ego+crash'])
print(len(train_crash), len(val_crash))

train_weather, val_weather, _, _ = train_test_split(weather_df, weather_df['weather'], test_size=0.2, random_state=CFG['SEED'], stratify=weather_df['weather'])
print(len(train_weather), len(val_weather))

train_timing, val_timing, _, _ = train_test_split(timing_df, timing_df['timing'], test_size=0.2, random_state=CFG['SEED'], stratify=timing_df['timing'])
print(len(train_timing), len(val_timing))

2152 538
731 183
731 183


In [None]:
#train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CFG['SEED'])

In [None]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list):
        self.video_path_list = video_path_list
        self.label_list = label_list
        #self.frames_list = []

        # for video in tqdm(self.video_path_list):
        #     sub_frames = []
        #     p = '/content/drive/MyDrive/car_data'
        #     #cap = cv2.VideoCapture(p+path[1:])
        #     cap = cv2.VideoCapture(p+video[1:])
        #     for ii in range(CFG['VIDEO_LENGTH']):
        #         _, img = cap.read()
        #         img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
        #         img = img/255
        #         sub_frames.append(img)
        #     frame_torch = torch.FloatTensor(np.array(sub_frames)).permute(3, 0, 1, 2)
        #     self.frames_list.append(frame_torch)


    def __getitem__(self, index):
        #frames = self.frames_list[index]
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        p = '/content/drive/MyDrive/car_data'
        cap = cv2.VideoCapture(p+path[1:])
        #cap = cv2.VideoCapture(path)
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
            img = img / 255.
            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)

##Model

In [None]:
class BaseModel(nn.Module):
    def __init__(self, num_classes):
        super(BaseModel, self).__init__()
        self.model = r3d_18(weights='KINETICS400_V1')
        self.classifier = nn.Sequential(
            nn.Linear(400, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )
        self.fc = nn.Linear(400, num_classes)
    
    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0),-1)
        x = self.classifier(x)
        return x


Loss

In [None]:
#FocalLoss
class FocalLoss(nn.Module):
    def __init__(self, weight=None, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.weight = weight
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, weight=self.weight, reduction=self.reduction)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1-pt)**self.gamma * ce_loss).mean()
        return focal_loss

Train

In [None]:
def train_fold(model, optimizer, train_loader, val_loader, class_weights, scheduler, device, fold):
    model.to(device)
    #criterion = nn.CrossEntropyLoss().to(device)
    criterion = FocalLoss(weight=class_weights).to(device)
    
    best_val_loss = 9999999
    best_val_score = 0
    best_model = None
    
    #Early_stop
    patience_limit = 4
    patience = 0

    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)#float
            labels = labels.to(device)#long
            
            optimizer.zero_grad()

            output = model(videos)
            loss = criterion(output, labels)
            #loss = loss / CFG['ACCUMULATION']
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            #scheduler.step(_val_score)
            scheduler.step(_val_loss)
            
        if best_val_loss > _val_loss:
            best_val_loss = _val_loss
            best_val_score = _val_score
            best_model = model
            patience = 0
        else:
            patience += 1
            if patience >= patience_limit:
                break
    
    torch.save(best_model.state_dict(), f'{path}/best_model.pt')
    print(f'Best Val Loss : [{best_val_loss:.5f}] Best Val F1 : [{best_val_score:.5f}]')

    
    return best_model, best_val_loss, best_val_score

In [None]:
def train(model, optimizer, train_loader, val_loader, class_weights, scheduler, device):
    model.to(device)
    #criterion = nn.CrossEntropyLoss().to(device)
    criterion = FocalLoss(weight=class_weights).to(device)
    
    best_val_loss = 9999999
    best_val_score = 0
    best_model = None
    
    #Early_stop
    patience_limit = 4
    patience = 0

    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)#float
            labels = labels.to(device)#long
            
            optimizer.zero_grad()

            output = model(videos)
            loss = criterion(output, labels)
            #loss = loss / CFG['ACCUMULATION']
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            #scheduler.step(_val_score)
            scheduler.step(_val_loss)
        if best_val_loss > _val_loss:
            best_val_loss = _val_loss
            best_val_score = _val_score
            best_model = model
            patience = 0
            print(f'patience : {patience}')
        else:
            patience += 1
            print(f'patience : {patience}')

            if patience >= patience_limit:
                break
    
    torch.save(best_model.state_dict(), f'{path}/best_model.pt')
    print(f'Best Val Loss : [{best_val_loss:.5f}] Best Val F1 : [{best_val_score:.5f}]')

    
    return best_model, best_val_loss, best_val_score

In [None]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            #labels = labels.type(torch.LongTensor)
            videos = videos.to(device)
            labels = labels.to(device)
            
            logit = model(videos)
            
            loss = criterion(logit, labels)
            
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            
            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [None]:
train_weather.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 731 entries, 374 to 2184
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   sample_id   731 non-null    object
 1   video_path  731 non-null    object
 2   weather     731 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 22.8+ KB


### Datasets, DataLoader

#Train

crash


In [None]:
#crash
train_dataset = CustomDataset(train_crash['video_path'].values, train_crash['ego+crash'].values)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_crash['video_path'].values, val_crash['ego+crash'].values)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)


In [None]:
load_model = False
if load_model:
    model = BaseModel(num_classes=3)
    model.load_state_dict(torch.load(f'{path}/model/model.pt'))
else:
    model = BaseModel(num_classes=3)

model.eval()
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-9, verbose=True)
crash_ego_model, crash_ego_best_score, crash_ego_best_loss = train(model, optimizer, train_loader, val_loader, crash_class_weights, scheduler, device)

#자원관리
gc.collect()
torch.cuda.empty_cache()

#모델 세이브
torch.save(crash_ego_model.state_dict(), f'{path}/crash_best_model.pt')


  0%|          | 0/135 [00:00<?, ?it/s]

OutOfMemoryError: ignored

weather

Kfold

In [None]:
# #using Kfold
# from sklearn.model_selection import KFold
# kfold = KFold(n_splits=CFG['FOLD'], shuffle=True, random_state=CFG['SEED'])
# folds = list(kfold.split(weather_df))


In [None]:
# load_model = False
# train_loss, valid_loss, valid_score = [], [], []
# for fold in range(CFG['FOLD']):
#     print(f"{'='*20} Fold: {fold} {'='*20}")
#     train_idx, val_idx = folds[fold]
#     train_fold = weather_df.iloc[train_idx].reset_index(drop=True)
#     val_fold = weather_df.iloc[val_idx].reset_index(drop=True)
#     train_dataset = CustomDataset(train_fold['video_path'].values, train_fold['weather'].values)
#     train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

#     val_dataset = CustomDataset(val_fold['video_path'].values, val_fold['weather'].values)
#     val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)


#     if load_model:
#         model = BaseModel(num_classes=3)
#         model.load_state_dict(torch.load(f'{path}/model/model.pt'))
#     else:
#         model = BaseModel(num_classes=3)

#     model.eval()
#     optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"])
#     scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-9, verbose=True)
#     weather_model, weather_best_score, weather_best_loss = train(model, optimizer, train_loader, val_loader, weather_class_weights, scheduler, device)

#     #자원관리
#     gc.collect()
#     torch.cuda.empty_cache()

#     #모델 세이브 
#     torch.save(weather_model.state_dict(), f'{path}/weather_best_model.pt')




  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.54606] Val Loss : [0.49774] Val F1 : [0.27100]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.48525] Val Loss : [0.47183] Val F1 : [0.26604]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.45283] Val Loss : [0.45378] Val F1 : [0.27368]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.40351] Val Loss : [0.42573] Val F1 : [0.34741]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.38662] Val Loss : [0.41137] Val F1 : [0.36081]
Best Val Loss : [0.41137] Best Val F1 : [0.36081]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.57817] Val Loss : [0.53920] Val F1 : [0.25895]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.51981] Val Loss : [0.52139] Val F1 : [0.27198]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.47067] Val Loss : [0.49366] Val F1 : [0.29184]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.43338] Val Loss : [0.47724] Val F1 : [0.28266]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.40540] Val Loss : [0.45328] Val F1 : [0.31939]
Best Val Loss : [0.45328] Best Val F1 : [0.31939]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.61802] Val Loss : [0.53744] Val F1 : [0.19618]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.56006] Val Loss : [0.50783] Val F1 : [0.21019]


  0%|          | 0/46 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
#weather
train_dataset = CustomDataset(train_weather['video_path'].values, train_weather['weather'].values)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_weather['video_path'].values, val_weather['weather'].values)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)


In [None]:
load_model = True
if load_model:
    model = BaseModel(num_classes=3)
    model.load_state_dict(torch.load(f'{path}/weather_best_model.pt'))
else:
    model = BaseModel(num_classes=3)

model.eval()
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-9, verbose=True)
weather_model, weather_best_score, weather_best_loss = train(model, optimizer, train_loader, val_loader, weather_class_weights, scheduler, device)

#자원관리
gc.collect()
torch.cuda.empty_cache()

#모델 세이브 
torch.save(weather_model.state_dict(), f'{path}/weather_best_model.pt')


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.00735] Val Loss : [0.24331] Val F1 : [0.61919]
patience : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.00649] Val Loss : [0.26866] Val F1 : [0.61495]
patience : 1


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.00443] Val Loss : [0.23873] Val F1 : [0.64243]
patience : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.00353] Val Loss : [0.26205] Val F1 : [0.61782]
patience : 1


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.00371] Val Loss : [0.27844] Val F1 : [0.62086]
patience : 2


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.00291] Val Loss : [0.26135] Val F1 : [0.61338]
Epoch 00006: reducing learning rate of group 0 to 7.5000e-07.
patience : 3


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.00247] Val Loss : [0.26053] Val F1 : [0.61919]
patience : 4
Best Val Loss : [0.23873] Best Val F1 : [0.64243]


timing

In [None]:
#timing
train_dataset = CustomDataset(train_timing['video_path'].values, train_timing['timing'].values)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_timing['video_path'].values, val_timing['timing'].values)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)


In [None]:
load_model = True
if load_model:
    model = BaseModel(num_classes=2)
    model.load_state_dict(torch.load(f'{path}/timing_best_model.pt'))
else:
    model = BaseModel(num_classes=2)

model.eval()
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-9, verbose=True)
timing_model, timing_best_score, timing_best_loss = train(model, optimizer, train_loader, val_loader, timing_class_weights, scheduler, device)

#자원관리
gc.collect()
torch.cuda.empty_cache()

#파일 세이브
torch.save(timing_model.state_dict(), f'{path}/timing_best_model.pt')


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.02936] Val Loss : [0.04201] Val F1 : [0.87640]
patience : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.02512] Val Loss : [0.04017] Val F1 : [0.89955]
patience : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.01988] Val Loss : [0.03645] Val F1 : [0.89955]
patience : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.01677] Val Loss : [0.03713] Val F1 : [0.89955]
patience : 1


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.01462] Val Loss : [0.03600] Val F1 : [0.89955]
patience : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.01436] Val Loss : [0.03444] Val F1 : [0.89955]
patience : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.00901] Val Loss : [0.03588] Val F1 : [0.91191]
patience : 1


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.01092] Val Loss : [0.03455] Val F1 : [0.91191]
patience : 2


  0%|          | 0/46 [00:00<?, ?it/s]

error: ignored

##Inference

In [None]:
test = pd.read_csv(f'{path}/test.csv')
#test['video_path'] = test['video_path'].apply(lambda x: x.replace('./test/', f'./{path}/test/'))#파일 path 바꾸끼

In [None]:
test_dataset = CustomDataset(test['video_path'].values, None)
test_dataloader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

In [None]:
def inference(model, test_dataloader, device):
    model.to(device)
    model.eval()

    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_dataloader)):
            videos = videos.to(device)
            
            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

loading Model

In [None]:
crash_mw = torch.load(f'{path}/crash_best_model.pt')
crash_model = BaseModel(num_classes=3)
crash_model.load_state_dict(crash_mw)

weather_mw = torch.load(f'{path}/weather_best_model.pt')
weather_model = BaseModel(num_classes=3)
weather_model.load_state_dict(weather_mw)

timing_mw = torch.load(f'{path}/timing_best_model.pt')
timing_model = BaseModel(num_classes=2)
timing_model.load_state_dict(timing_mw)



<All keys matched successfully>

###Preds

In [None]:
crash_preds = inference(crash_model, test_dataloader, device)
weather_preds = inference(weather_model, test_dataloader, device)
timing_preds = inference(timing_model, test_dataloader, device)


  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

In [None]:
#ego+crash, weather, timing
label_dict = {1:[1,0,0], 2:[1,0,1], 3:[1,1,0], 4:[1,1,1], 5:[1,2,0],6:[1,2,1],7:[2,0,0],8:[2,0,1],9:[2,1,0],10:[2,1,1],11:[2,2,0],12:[2,2,1]}
label_dict_rev = {(1,0,0):1, (1,0,1):2, (1,1,0):3, (1,1,1):4, (1,2,0):5, (1,2,1):6, (2,0,0):7, (2,0,1):8, (2,1,0):9, (2,1,1):10, (2,2,0):11, (2,2,1):12}


In [None]:
preds = []
for crash_pred, weather_pred, timing_pred in zip(crash_preds, weather_preds, timing_preds):
    if crash_pred == 0:
        preds.append(0)
    else:
        temp = (crash_pred, weather_pred, timing_pred)
        preds.append(label_dict_rev[temp])

In [None]:
submit = pd.read_csv(f'{path}/sample_submission.csv')

In [None]:
submit['label'] = preds
submit.head()

Unnamed: 0,sample_id,label
0,TEST_0000,2
1,TEST_0001,0
2,TEST_0002,8
3,TEST_0003,0
4,TEST_0004,0


In [None]:
submit.to_csv('./baseline_submit2.csv', index=False)

In [None]:
from google.colab import files
files.download('baseline_submit2.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>