## Import

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()


from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer


import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
import random

import shap

import warnings
warnings.filterwarnings(action='ignore') 

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('mps')

## Hyperparameter setting

In [2]:
CFG = {
    'EPOCHS': 100,
    'LEARNING_RATE':1e-2,
    'BATCH_SIZE':256,
    'SEED':41
}

## Fixed RandomSeed

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

## Data Load

In [4]:
data_path = './data/'
train = pd.read_csv(f'{data_path}train.csv')
test = pd.read_csv(f'{data_path}test.csv')

## Data Preprocessing
#### 1. 결측치 처리
#### 2. Train / Validation 분할
#### 3. Data label-encoding, scaling

In [5]:
categorical_features = ['COMPONENT_ARBITRARY', 'YEAR']
# Inference(실제 진단 환경)에 사용하는 컬럼
test_stage_features = ['COMPONENT_ARBITRARY', 'ANONYMOUS_1', 'YEAR' , 'ANONYMOUS_2', 'AG', 'CO', 'CR', 'CU', 'FE', 'H2O', 'MN', 'MO', 'NI', 'PQINDEX', 'TI', 'V', 'V40', 'ZN']

In [6]:
train = train.fillna(0)
test = test.fillna(0)

In [7]:
all_X = train.drop(['ID', 'Y_LABEL'], axis = 1)
all_y = train['Y_LABEL']

test = test.drop(['ID'], axis = 1)

train_X, val_X, train_y, val_y = train_test_split(all_X, all_y, test_size=0.2, random_state=CFG['SEED'], stratify=all_y)

In [8]:
def get_values(value):
    return value.values.reshape(-1, 1)

for col in train_X.columns:
    if col not in categorical_features:
        scaler = StandardScaler()
        # scaler = RobustScaler()
        # scaler = Normalizer()
        train_X[col] = scaler.fit_transform(get_values(train_X[col]))
        val_X[col] = scaler.transform(get_values(val_X[col]))
        if col in test.columns:
            test[col] = scaler.transform(get_values(test[col]))
            
le = LabelEncoder()
for col in categorical_features:    
    train_X[col] = le.fit_transform(train_X[col])
    val_X[col] = le.transform(val_X[col])
    if col in test.columns:
        test[col] = le.transform(test[col])

## CustomDataset

In [9]:
test

Unnamed: 0,COMPONENT_ARBITRARY,ANONYMOUS_1,YEAR,ANONYMOUS_2,AG,CO,CR,CU,FE,H2O,MN,MO,NI,PQINDEX,TI,V,V40,ZN
0,0,-0.222251,9,-0.356161,-0.150219,-0.083774,-0.108937,-0.267691,-0.304078,-0.038678,-0.237397,-0.402591,-0.186244,-0.263113,-0.107912,-0.107095,-0.355462,0.946273
1,2,-0.079324,4,-0.356161,-0.150219,-0.083774,-0.043930,-0.267691,0.183372,-0.038678,0.018169,-0.402591,-0.186244,1.517108,0.052343,-0.107095,0.341621,-1.085701
2,1,-0.272109,3,-0.356161,-0.150219,-0.083774,-0.108937,-0.147029,-0.316906,-0.038678,-0.237397,-0.402591,-0.186244,-0.262459,-0.107912,-0.107095,-1.275768,0.236306
3,2,-0.409338,2,-0.356161,-0.150219,-0.083774,-0.011426,-0.243558,-0.027368,-0.038678,0.103358,-0.352325,-0.186244,4.967021,-0.107912,-0.107095,0.652958,-0.931279
4,1,1.210105,6,-0.356161,-0.150219,-0.083774,-0.108937,-0.227470,-0.302246,-0.038678,-0.237397,-0.402591,-0.186244,-0.259189,-0.107912,-0.107095,-0.901771,-0.225078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,2,-0.335738,7,-0.356161,-0.150219,-0.083774,-0.011426,0.770002,1.592582,-0.038678,5.299878,-0.318814,0.323239,1.037717,-0.107912,-0.107095,-0.749039,1.081864
6037,2,0.238107,9,-0.356161,-0.150219,-0.083774,0.053581,-0.259647,1.022668,-0.038678,0.188547,-0.402591,-0.186244,0.183577,-0.107912,-0.107095,0.555053,-1.083818
6038,2,0.284166,7,-0.356161,-0.150219,-0.083774,-0.108937,-0.275735,-0.228945,-0.038678,-0.237397,-0.402591,-0.186244,-0.224527,-0.107912,-0.107095,3.774165,-1.083818
6039,1,-0.418835,6,-0.356161,-0.150219,-0.083774,-0.108937,0.223001,-0.322404,-0.038678,-0.237397,-0.402591,-0.186244,-0.266383,-0.107912,-0.107095,-1.142617,-0.208129


In [10]:
class CustomDataset(Dataset):
    def __init__(self, data_X, data_y, distillation=False):
        super(CustomDataset, self).__init__()
        self.data_X = data_X
        self.data_y = data_y
        self.distillation = distillation
        
    def __len__(self):
        return len(self.data_X)
    
    def __getitem__(self, index):
        if self.distillation:
            # 지식 증류 학습 시
            teacher_X = torch.Tensor(self.data_X.iloc[index])
            student_X = torch.Tensor(self.data_X[test_stage_features].iloc[index])
            y = self.data_y.values[index]
            return teacher_X, student_X, y
        else:
            if self.data_y is None:
                test_X = torch.Tensor(self.data_X.iloc[index])
                return test_X
            else:
                teacher_X = torch.Tensor(self.data_X.iloc[index])
                y = self.data_y.values[index]
                return teacher_X, y

In [11]:
train_dataset = CustomDataset(train_X, train_y, False)
val_dataset = CustomDataset(val_X, val_y, False)

In [12]:
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False)

## Define Teacher Model

In [13]:
class Teacher(nn.Module):
    def __init__(self):
        super(Teacher, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=52, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(),
            nn.Linear(in_features=1024, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        output = self.classifier(x)
        return output

## Teacher Train / Validation

In [14]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)

    best_score = 0
    best_model = None
    criterion = nn.BCELoss().to(device)

    for epoch in range(CFG["EPOCHS"]):
        train_loss = []
  
        model.train()
        for X, y in tqdm(train_loader):
            X = X.float().to(device)
            y = y.float().to(device)
            
            
            optimizer.zero_grad()
            
            y_pred = model(X)
            
            loss = criterion(y_pred, y.reshape(-1, 1))
            loss.backward()
            
            optimizer.step()

            train_loss.append(loss.item())

        val_loss, val_score = validation_teacher(model, val_loader, criterion, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')

        if scheduler is not None:
            scheduler.step(val_score)
            
        if best_score < val_score:
            best_model = model
            best_score = val_score
        writer.add_scalars("Train", {"Loss/train" : np.mean(train_loss),
                                     "Loss/val" : np.mean(val_loss),
                                     "f1_score" : val_score}, epoch)
    writer.close()
    return best_model 

In [15]:
def competition_metric(true, pred):
    return f1_score(true, pred, average="macro")

def validation_teacher(model, val_loader, criterion, device):
    model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35
    
    with torch.no_grad():
        for X, y in tqdm(val_loader):
            X = X.float().to(device)
            y = y.float().to(device)
            
            model_pred = model(X.to(device))
            
            loss = criterion(model_pred, y.reshape(-1, 1))
            val_loss.append(loss.item())      
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
    return val_loss, val_f1   

## Run (Teacher Model)

In [16]:
model = Teacher()
model.eval()
optimizer = torch.optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

teacher_model = train(model, optimizer, train_loader, val_loader, scheduler, device)
writer.flush()
# shap.initjs()
# explainer = shap.TreeExplainer(model)
# shap_values = explainer.shap_values(train_X)

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [0], Train Loss : [0.26757] Val Loss : [0.24967] Val F1 Score : [0.74427]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.20002] Val Loss : [0.26592] Val F1 Score : [0.77762]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.19833] Val Loss : [0.30616] Val F1 Score : [0.76774]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.18716] Val Loss : [0.28211] Val F1 Score : [0.76505]
Epoch 00004: reducing learning rate of group 0 to 5.0000e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.16961] Val Loss : [0.29212] Val F1 Score : [0.77982]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.17104] Val Loss : [0.29040] Val F1 Score : [0.79391]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.16557] Val Loss : [0.26562] Val F1 Score : [0.79419]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.15134] Val Loss : [0.25397] Val F1 Score : [0.78674]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.14376] Val Loss : [0.38326] Val F1 Score : [0.78255]
Epoch 00009: reducing learning rate of group 0 to 2.5000e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.14287] Val Loss : [0.33872] Val F1 Score : [0.79193]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.16264] Val Loss : [0.25441] Val F1 Score : [0.78541]
Epoch 00011: reducing learning rate of group 0 to 1.2500e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.13639] Val Loss : [0.27354] Val F1 Score : [0.79097]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.13366] Val Loss : [0.29229] Val F1 Score : [0.78309]
Epoch 00013: reducing learning rate of group 0 to 6.2500e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.12781] Val Loss : [0.29469] Val F1 Score : [0.78403]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.12665] Val Loss : [0.29216] Val F1 Score : [0.78600]
Epoch 00015: reducing learning rate of group 0 to 3.1250e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.12541] Val Loss : [0.29777] Val F1 Score : [0.79185]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.13124] Val Loss : [0.30126] Val F1 Score : [0.79061]
Epoch 00017: reducing learning rate of group 0 to 1.5625e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.12210] Val Loss : [0.30210] Val F1 Score : [0.79052]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.12204] Val Loss : [0.29777] Val F1 Score : [0.78729]
Epoch 00019: reducing learning rate of group 0 to 7.8125e-05.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.12079] Val Loss : [0.29741] Val F1 Score : [0.79454]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.12397] Val Loss : [0.30185] Val F1 Score : [0.78801]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.11912] Val Loss : [0.31268] Val F1 Score : [0.79391]
Epoch 00022: reducing learning rate of group 0 to 3.9063e-05.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.12601] Val Loss : [0.29965] Val F1 Score : [0.78895]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.13268] Val Loss : [0.30001] Val F1 Score : [0.79329]
Epoch 00024: reducing learning rate of group 0 to 1.9531e-05.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.12849] Val Loss : [0.31137] Val F1 Score : [0.78978]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.12157] Val Loss : [0.31156] Val F1 Score : [0.78507]
Epoch 00026: reducing learning rate of group 0 to 9.7656e-06.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.11964] Val Loss : [0.28774] Val F1 Score : [0.79336]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.13438] Val Loss : [0.29826] Val F1 Score : [0.78751]
Epoch 00028: reducing learning rate of group 0 to 4.8828e-06.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.11949] Val Loss : [0.30454] Val F1 Score : [0.79267]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.12722] Val Loss : [0.30711] Val F1 Score : [0.78942]
Epoch 00030: reducing learning rate of group 0 to 2.4414e-06.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [30], Train Loss : [0.11930] Val Loss : [0.30082] Val F1 Score : [0.80072]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [31], Train Loss : [0.12064] Val Loss : [0.31331] Val F1 Score : [0.78659]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [32], Train Loss : [0.12002] Val Loss : [0.30243] Val F1 Score : [0.78862]
Epoch 00033: reducing learning rate of group 0 to 1.2207e-06.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [33], Train Loss : [0.12304] Val Loss : [0.30701] Val F1 Score : [0.78781]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [34], Train Loss : [0.14306] Val Loss : [0.31737] Val F1 Score : [0.79630]
Epoch 00035: reducing learning rate of group 0 to 6.1035e-07.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [35], Train Loss : [0.12778] Val Loss : [0.30285] Val F1 Score : [0.78681]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [36], Train Loss : [0.12195] Val Loss : [0.30891] Val F1 Score : [0.78843]
Epoch 00037: reducing learning rate of group 0 to 3.0518e-07.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [37], Train Loss : [0.12239] Val Loss : [0.29731] Val F1 Score : [0.78647]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [38], Train Loss : [0.12477] Val Loss : [0.30015] Val F1 Score : [0.79096]
Epoch 00039: reducing learning rate of group 0 to 1.5259e-07.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [39], Train Loss : [0.12533] Val Loss : [0.31563] Val F1 Score : [0.79911]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [40], Train Loss : [0.12057] Val Loss : [0.32167] Val F1 Score : [0.79070]
Epoch 00041: reducing learning rate of group 0 to 7.6294e-08.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [41], Train Loss : [0.11763] Val Loss : [0.29154] Val F1 Score : [0.78740]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [42], Train Loss : [0.11896] Val Loss : [0.30263] Val F1 Score : [0.78624]
Epoch 00043: reducing learning rate of group 0 to 3.8147e-08.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [43], Train Loss : [0.13311] Val Loss : [0.30831] Val F1 Score : [0.79136]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [44], Train Loss : [0.12068] Val Loss : [0.31989] Val F1 Score : [0.78914]
Epoch 00045: reducing learning rate of group 0 to 1.9073e-08.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [45], Train Loss : [0.12126] Val Loss : [0.31579] Val F1 Score : [0.78702]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [46], Train Loss : [0.12244] Val Loss : [0.30929] Val F1 Score : [0.78871]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [47], Train Loss : [0.11929] Val Loss : [0.30092] Val F1 Score : [0.79321]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [48], Train Loss : [0.12720] Val Loss : [0.31173] Val F1 Score : [0.79245]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [49], Train Loss : [0.12198] Val Loss : [0.32682] Val F1 Score : [0.78762]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [50], Train Loss : [0.11918] Val Loss : [0.30517] Val F1 Score : [0.79245]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [51], Train Loss : [0.14545] Val Loss : [0.31667] Val F1 Score : [0.78740]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [52], Train Loss : [0.13125] Val Loss : [0.30271] Val F1 Score : [0.79375]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [53], Train Loss : [0.12877] Val Loss : [0.31796] Val F1 Score : [0.79112]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [54], Train Loss : [0.12021] Val Loss : [0.30490] Val F1 Score : [0.78702]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [55], Train Loss : [0.11829] Val Loss : [0.29316] Val F1 Score : [0.78611]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [56], Train Loss : [0.12039] Val Loss : [0.29298] Val F1 Score : [0.78924]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [57], Train Loss : [0.12306] Val Loss : [0.29710] Val F1 Score : [0.78772]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [58], Train Loss : [0.11892] Val Loss : [0.33321] Val F1 Score : [0.79271]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [59], Train Loss : [0.11922] Val Loss : [0.30997] Val F1 Score : [0.79023]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [60], Train Loss : [0.12312] Val Loss : [0.30163] Val F1 Score : [0.79096]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [61], Train Loss : [0.11986] Val Loss : [0.29697] Val F1 Score : [0.79096]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [62], Train Loss : [0.13056] Val Loss : [0.29706] Val F1 Score : [0.78936]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [63], Train Loss : [0.12208] Val Loss : [0.30073] Val F1 Score : [0.79253]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [64], Train Loss : [0.11990] Val Loss : [0.31140] Val F1 Score : [0.78905]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [65], Train Loss : [0.12503] Val Loss : [0.29758] Val F1 Score : [0.79127]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [66], Train Loss : [0.12721] Val Loss : [0.30191] Val F1 Score : [0.79616]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [67], Train Loss : [0.12206] Val Loss : [0.29775] Val F1 Score : [0.79413]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [68], Train Loss : [0.12285] Val Loss : [0.29303] Val F1 Score : [0.78879]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [69], Train Loss : [0.12079] Val Loss : [0.29941] Val F1 Score : [0.79321]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [70], Train Loss : [0.12239] Val Loss : [0.31860] Val F1 Score : [0.79039]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [71], Train Loss : [0.12090] Val Loss : [0.29705] Val F1 Score : [0.78730]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [72], Train Loss : [0.12042] Val Loss : [0.30231] Val F1 Score : [0.79006]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [73], Train Loss : [0.11905] Val Loss : [0.30448] Val F1 Score : [0.78843]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [74], Train Loss : [0.13338] Val Loss : [0.30652] Val F1 Score : [0.77961]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [75], Train Loss : [0.11930] Val Loss : [0.31256] Val F1 Score : [0.78822]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [76], Train Loss : [0.12969] Val Loss : [0.31219] Val F1 Score : [0.79432]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [77], Train Loss : [0.11924] Val Loss : [0.29530] Val F1 Score : [0.78751]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [78], Train Loss : [0.12384] Val Loss : [0.32215] Val F1 Score : [0.79426]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [79], Train Loss : [0.12686] Val Loss : [0.31443] Val F1 Score : [0.79185]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [80], Train Loss : [0.11938] Val Loss : [0.28983] Val F1 Score : [0.78884]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [81], Train Loss : [0.12216] Val Loss : [0.29216] Val F1 Score : [0.79773]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [82], Train Loss : [0.12513] Val Loss : [0.31591] Val F1 Score : [0.78905]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [83], Train Loss : [0.13112] Val Loss : [0.32020] Val F1 Score : [0.79649]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [84], Train Loss : [0.12451] Val Loss : [0.31177] Val F1 Score : [0.78914]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [85], Train Loss : [0.12063] Val Loss : [0.29048] Val F1 Score : [0.78729]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [86], Train Loss : [0.12972] Val Loss : [0.31067] Val F1 Score : [0.79368]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [87], Train Loss : [0.12123] Val Loss : [0.30994] Val F1 Score : [0.78933]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [88], Train Loss : [0.12024] Val Loss : [0.29981] Val F1 Score : [0.78800]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [89], Train Loss : [0.12417] Val Loss : [0.29842] Val F1 Score : [0.78933]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [90], Train Loss : [0.12401] Val Loss : [0.30021] Val F1 Score : [0.79023]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [91], Train Loss : [0.12627] Val Loss : [0.30252] Val F1 Score : [0.78772]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [92], Train Loss : [0.12092] Val Loss : [0.30579] Val F1 Score : [0.79031]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [93], Train Loss : [0.12267] Val Loss : [0.30700] Val F1 Score : [0.78975]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [94], Train Loss : [0.11918] Val Loss : [0.30184] Val F1 Score : [0.79079]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [95], Train Loss : [0.12492] Val Loss : [0.31265] Val F1 Score : [0.79096]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [96], Train Loss : [0.12578] Val Loss : [0.30673] Val F1 Score : [0.78905]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [97], Train Loss : [0.12713] Val Loss : [0.30574] Val F1 Score : [0.79274]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [98], Train Loss : [0.12245] Val Loss : [0.30317] Val F1 Score : [0.79014]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [99], Train Loss : [0.12041] Val Loss : [0.32259] Val F1 Score : [0.79229]


In [46]:
train_X = train_X.astype('float32')

In [47]:
train_X.dtypes

COMPONENT_ARBITRARY    float32
ANONYMOUS_1            float32
YEAR                   float32
SAMPLE_TRANSFER_DAY    float32
ANONYMOUS_2            float32
AG                     float32
AL                     float32
B                      float32
BA                     float32
BE                     float32
CA                     float32
CD                     float32
CO                     float32
CR                     float32
CU                     float32
FH2O                   float32
FNOX                   float32
FOPTIMETHGLY           float32
FOXID                  float32
FSO4                   float32
FTBN                   float32
FE                     float32
FUEL                   float32
H2O                    float32
K                      float32
LI                     float32
MG                     float32
MN                     float32
MO                     float32
NA                     float32
NI                     float32
P                      float32
PB      

In [48]:
%%time
e = shap.DeepExplainer(
        model, 
        torch.from_numpy(
            train_X.to_numpy()
        ).to(device))

CPU times: user 30.7 ms, sys: 106 ms, total: 137 ms
Wall time: 544 ms


In [50]:
%%time
# x_samples = train_X[np.random.choice(np.arange(len(train_X)), 300, replace=False)]
# print(len(x_samples))
shap_values = e.shap_values(
    torch.from_numpy(train_X.to_numpy()).to(device)
)

In [None]:
shap_values.shape

In [None]:
import pandas as pd
df = pd.DataFrame({
    "mean_abs_shap": np.mean(np.abs(shap_values), axis=0), 
    "stdev_abs_shap": np.std(np.abs(shap_values), axis=0), 
    "name": train_X.columns
})
df.sort_values("mean_abs_shap", ascending=False)[:10]

In [None]:
shap.summary_plot(shap_values, features=train_X, feature_names=train_X.columns)

In [None]:
# shap.force_plot(explainer.expected_value, shap_values[0, :], train_X.iloc[0, :])

## Define Student Model

In [75]:
class Student(nn.Module):
    def __init__(self):
        super(Student, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=18, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        output = self.classifier(x)
        return output

## Define Knowledge distillation Loss

In [76]:
def distillation(student_logits, labels, teacher_logits, alpha):
    distillation_loss = nn.BCELoss()(student_logits, teacher_logits)
    student_loss = nn.BCELoss()(student_logits, labels.reshape(-1, 1))
    return alpha * student_loss + (1-alpha) * distillation_loss

In [77]:
def distill_loss(output, target, teacher_output, loss_fn=distillation, opt=optimizer):
    loss_b = loss_fn(output, target, teacher_output, alpha=0.1)

    if opt is not None:
        opt.zero_grad()
        loss_b.backward()
        opt.step()

    return loss_b.item()

## Student Train / Validation

In [78]:
def student_train(s_model, t_model, optimizer, train_loader, val_loader, scheduler, device):
    s_model.to(device)
    t_model.to(device)
    
    best_score = 0
    best_model = None

    for epoch in range(CFG["EPOCHS"]):
        train_loss = []
        s_model.train()
        t_model.eval()
        
        for X_t, X_s, y in tqdm(train_loader):
            X_t = X_t.float().to(device)
            X_s = X_s.float().to(device)
            y = y.float().to(device)
            
            optimizer.zero_grad()

            output = s_model(X_s)
            with torch.no_grad():
                teacher_output = t_model(X_t)
                
            loss_b = distill_loss(output, y, teacher_output, loss_fn=distillation, opt=optimizer)

            train_loss.append(loss_b)

        val_loss, val_score = validation_student(s_model, t_model, val_loader, distill_loss, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
            
        if best_score < val_score:
            best_model = s_model
            best_score = val_score
        
    return best_model

In [79]:
def validation_student(s_model, t_model, val_loader, criterion, device):
    s_model.eval()
    t_model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35
    
    with torch.no_grad():
        for X_t, X_s, y in tqdm(val_loader):
            X_t = X_t.float().to(device)
            X_s = X_s.float().to(device)
            y = y.float().to(device)
            
            model_pred = s_model(X_s)
            teacher_output = t_model(X_t)
            
            loss_b = distill_loss(model_pred, y, teacher_output, loss_fn=distillation, opt=None)
            val_loss.append(loss_b)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
    return val_loss, val_f1    

## Run (Student Model)

In [80]:
train_dataset = CustomDataset(train_X, train_y, True)
val_dataset = CustomDataset(val_X, val_y, True)

train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False)

In [81]:
student_model = Student()
student_model.eval()
optimizer = torch.optim.Adam(student_model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

best_student_model = student_train(student_model, teacher_model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [0], Train Loss : [0.31767] Val Loss : [0.29004] Val F1 Score : [0.47758]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.27861] Val Loss : [0.28532] Val F1 Score : [0.48189]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.27978] Val Loss : [0.28503] Val F1 Score : [0.47767]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.28319] Val Loss : [0.27893] Val F1 Score : [0.48504]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.27770] Val Loss : [0.28535] Val F1 Score : [0.50266]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.27526] Val Loss : [0.27572] Val F1 Score : [0.49633]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.27676] Val Loss : [0.28085] Val F1 Score : [0.48936]
Epoch 00007: reducing learning rate of group 0 to 5.0000e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.27052] Val Loss : [0.27582] Val F1 Score : [0.49354]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.27045] Val Loss : [0.27586] Val F1 Score : [0.49616]
Epoch 00009: reducing learning rate of group 0 to 2.5000e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.27247] Val Loss : [0.27698] Val F1 Score : [0.49354]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.27193] Val Loss : [0.27556] Val F1 Score : [0.50425]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.26656] Val Loss : [0.27769] Val F1 Score : [0.50855]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.26930] Val Loss : [0.27650] Val F1 Score : [0.51053]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.26907] Val Loss : [0.27721] Val F1 Score : [0.49290]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.26959] Val Loss : [0.27714] Val F1 Score : [0.50576]
Epoch 00015: reducing learning rate of group 0 to 1.2500e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.26750] Val Loss : [0.27756] Val F1 Score : [0.52194]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.26457] Val Loss : [0.27809] Val F1 Score : [0.51773]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.26317] Val Loss : [0.27643] Val F1 Score : [0.49892]
Epoch 00018: reducing learning rate of group 0 to 6.2500e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.26354] Val Loss : [0.27674] Val F1 Score : [0.51822]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.26179] Val Loss : [0.27664] Val F1 Score : [0.51075]
Epoch 00020: reducing learning rate of group 0 to 3.1250e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.26086] Val Loss : [0.27761] Val F1 Score : [0.52091]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.25967] Val Loss : [0.28007] Val F1 Score : [0.52116]
Epoch 00022: reducing learning rate of group 0 to 1.5625e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.26557] Val Loss : [0.27781] Val F1 Score : [0.50986]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.26502] Val Loss : [0.27969] Val F1 Score : [0.52378]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.26168] Val Loss : [0.28072] Val F1 Score : [0.51123]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.26074] Val Loss : [0.27937] Val F1 Score : [0.51474]
Epoch 00026: reducing learning rate of group 0 to 7.8125e-05.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.26462] Val Loss : [0.27672] Val F1 Score : [0.51569]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.25911] Val Loss : [0.27774] Val F1 Score : [0.51922]
Epoch 00028: reducing learning rate of group 0 to 3.9063e-05.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.26012] Val Loss : [0.27892] Val F1 Score : [0.51545]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.26517] Val Loss : [0.27783] Val F1 Score : [0.52142]
Epoch 00030: reducing learning rate of group 0 to 1.9531e-05.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [30], Train Loss : [0.26158] Val Loss : [0.27784] Val F1 Score : [0.51236]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [31], Train Loss : [0.25924] Val Loss : [0.27815] Val F1 Score : [0.52378]
Epoch 00032: reducing learning rate of group 0 to 9.7656e-06.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [32], Train Loss : [0.26479] Val Loss : [0.27712] Val F1 Score : [0.52825]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [33], Train Loss : [0.26288] Val Loss : [0.27771] Val F1 Score : [0.51521]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [34], Train Loss : [0.25892] Val Loss : [0.27699] Val F1 Score : [0.52632]
Epoch 00035: reducing learning rate of group 0 to 4.8828e-06.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [35], Train Loss : [0.25997] Val Loss : [0.27750] Val F1 Score : [0.52687]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [36], Train Loss : [0.26054] Val Loss : [0.27793] Val F1 Score : [0.51259]
Epoch 00037: reducing learning rate of group 0 to 2.4414e-06.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [37], Train Loss : [0.26100] Val Loss : [0.27837] Val F1 Score : [0.51569]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [38], Train Loss : [0.26209] Val Loss : [0.27777] Val F1 Score : [0.51964]
Epoch 00039: reducing learning rate of group 0 to 1.2207e-06.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [39], Train Loss : [0.25933] Val Loss : [0.27928] Val F1 Score : [0.51865]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [40], Train Loss : [0.25882] Val Loss : [0.27605] Val F1 Score : [0.52220]
Epoch 00041: reducing learning rate of group 0 to 6.1035e-07.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [41], Train Loss : [0.25866] Val Loss : [0.27767] Val F1 Score : [0.52142]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [42], Train Loss : [0.26657] Val Loss : [0.27804] Val F1 Score : [0.52714]
Epoch 00043: reducing learning rate of group 0 to 3.0518e-07.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [43], Train Loss : [0.25940] Val Loss : [0.27915] Val F1 Score : [0.52194]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [44], Train Loss : [0.26252] Val Loss : [0.27874] Val F1 Score : [0.52015]
Epoch 00045: reducing learning rate of group 0 to 1.5259e-07.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [45], Train Loss : [0.26355] Val Loss : [0.27837] Val F1 Score : [0.52040]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [46], Train Loss : [0.26294] Val Loss : [0.27682] Val F1 Score : [0.51545]
Epoch 00047: reducing learning rate of group 0 to 7.6294e-08.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [47], Train Loss : [0.25955] Val Loss : [0.27807] Val F1 Score : [0.52769]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [48], Train Loss : [0.25907] Val Loss : [0.27851] Val F1 Score : [0.52906]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [49], Train Loss : [0.26182] Val Loss : [0.27849] Val F1 Score : [0.51236]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [50], Train Loss : [0.26353] Val Loss : [0.27787] Val F1 Score : [0.52769]
Epoch 00051: reducing learning rate of group 0 to 3.8147e-08.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [51], Train Loss : [0.26230] Val Loss : [0.28036] Val F1 Score : [0.52906]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [52], Train Loss : [0.25892] Val Loss : [0.27832] Val F1 Score : [0.52247]
Epoch 00053: reducing learning rate of group 0 to 1.9073e-08.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [53], Train Loss : [0.27058] Val Loss : [0.27735] Val F1 Score : [0.52378]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [54], Train Loss : [0.26301] Val Loss : [0.27741] Val F1 Score : [0.52065]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [55], Train Loss : [0.26011] Val Loss : [0.27872] Val F1 Score : [0.52878]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [56], Train Loss : [0.26620] Val Loss : [0.27936] Val F1 Score : [0.52769]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [57], Train Loss : [0.26023] Val Loss : [0.27748] Val F1 Score : [0.51847]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [58], Train Loss : [0.26633] Val Loss : [0.27816] Val F1 Score : [0.51259]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [59], Train Loss : [0.25977] Val Loss : [0.27796] Val F1 Score : [0.51773]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [60], Train Loss : [0.26011] Val Loss : [0.27957] Val F1 Score : [0.52040]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [61], Train Loss : [0.26021] Val Loss : [0.27987] Val F1 Score : [0.52168]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [62], Train Loss : [0.26002] Val Loss : [0.27783] Val F1 Score : [0.52458]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [63], Train Loss : [0.26303] Val Loss : [0.28030] Val F1 Score : [0.51403]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [64], Train Loss : [0.26017] Val Loss : [0.27916] Val F1 Score : [0.52405]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [65], Train Loss : [0.26562] Val Loss : [0.27933] Val F1 Score : [0.52040]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [66], Train Loss : [0.25911] Val Loss : [0.28078] Val F1 Score : [0.52405]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [67], Train Loss : [0.26219] Val Loss : [0.28087] Val F1 Score : [0.52378]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [68], Train Loss : [0.26118] Val Loss : [0.27696] Val F1 Score : [0.52512]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [69], Train Loss : [0.26666] Val Loss : [0.27815] Val F1 Score : [0.51773]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [70], Train Loss : [0.25915] Val Loss : [0.27900] Val F1 Score : [0.52714]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [71], Train Loss : [0.25933] Val Loss : [0.27682] Val F1 Score : [0.51666]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [72], Train Loss : [0.26058] Val Loss : [0.27658] Val F1 Score : [0.51497]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [73], Train Loss : [0.26121] Val Loss : [0.27700] Val F1 Score : [0.51521]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [74], Train Loss : [0.25872] Val Loss : [0.27842] Val F1 Score : [0.51914]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [75], Train Loss : [0.27270] Val Loss : [0.27814] Val F1 Score : [0.52378]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [76], Train Loss : [0.26073] Val Loss : [0.27864] Val F1 Score : [0.51675]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [77], Train Loss : [0.26354] Val Loss : [0.27663] Val F1 Score : [0.51213]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [78], Train Loss : [0.25904] Val Loss : [0.27858] Val F1 Score : [0.52142]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [79], Train Loss : [0.25918] Val Loss : [0.27927] Val F1 Score : [0.52632]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [80], Train Loss : [0.25971] Val Loss : [0.28237] Val F1 Score : [0.52795]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [81], Train Loss : [0.25815] Val Loss : [0.27954] Val F1 Score : [0.52768]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [82], Train Loss : [0.26183] Val Loss : [0.27757] Val F1 Score : [0.52797]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [83], Train Loss : [0.26363] Val Loss : [0.27869] Val F1 Score : [0.52659]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [84], Train Loss : [0.26062] Val Loss : [0.27822] Val F1 Score : [0.50791]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [85], Train Loss : [0.25952] Val Loss : [0.27838] Val F1 Score : [0.52405]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [86], Train Loss : [0.26039] Val Loss : [0.27804] Val F1 Score : [0.51724]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [87], Train Loss : [0.25960] Val Loss : [0.27950] Val F1 Score : [0.52352]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [88], Train Loss : [0.25866] Val Loss : [0.27706] Val F1 Score : [0.51236]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [89], Train Loss : [0.26694] Val Loss : [0.27661] Val F1 Score : [0.52168]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [90], Train Loss : [0.26009] Val Loss : [0.27828] Val F1 Score : [0.52196]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [91], Train Loss : [0.26003] Val Loss : [0.27708] Val F1 Score : [0.51964]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [92], Train Loss : [0.25900] Val Loss : [0.27688] Val F1 Score : [0.52431]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [93], Train Loss : [0.25936] Val Loss : [0.27785] Val F1 Score : [0.52498]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [94], Train Loss : [0.25910] Val Loss : [0.27694] Val F1 Score : [0.51773]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [95], Train Loss : [0.25988] Val Loss : [0.27817] Val F1 Score : [0.53048]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [96], Train Loss : [0.26611] Val Loss : [0.27971] Val F1 Score : [0.52906]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [97], Train Loss : [0.25995] Val Loss : [0.27970] Val F1 Score : [0.52065]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [98], Train Loss : [0.25925] Val Loss : [0.27908] Val F1 Score : [0.52991]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [99], Train Loss : [0.25897] Val Loss : [0.27801] Val F1 Score : [0.52551]


## Choose Inference Threshold

In [82]:
def choose_threshold(model, val_loader, device):
    model.to(device)
    model.eval()
    
    thresholds = [0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
    pred_labels = []
    true_labels = []
    
    best_score = 0
    best_thr = None
    with torch.no_grad():
        for _, x_s, y in tqdm(iter(val_loader)):
            x_s = x_s.float().to(device)
            y = y.float().to(device)
            
            model_pred = model(x_s)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        for threshold in thresholds:
            pred_labels_thr = np.where(np.array(pred_labels) > threshold, 1, 0)
            score_thr = competition_metric(true_labels, pred_labels_thr)
            if best_score < score_thr:
                best_score = score_thr
                best_thr = threshold
    return best_thr, best_score

In [83]:
best_threshold, best_score = choose_threshold(best_student_model, val_loader, device)
print(f'Best Threshold : [{best_threshold}], Score : [{best_score:.5f}]')

  0%|          | 0/12 [00:00<?, ?it/s]

Best Threshold : [0.2], Score : [0.54406]


## Inference

In [84]:
test_datasets = CustomDataset(test, None, False)
test_loaders = DataLoader(test_datasets, batch_size = CFG['BATCH_SIZE'], shuffle=False)

In [85]:
def inference(model, test_loader, threshold, device):
    model.to(device)
    model.eval()
    
    test_predict = []
    with torch.no_grad():
        for x in tqdm(test_loader):
            x = x.float().to(device)
            model_pred = model(x)

            model_pred = model_pred.squeeze(1).to('cpu')
            test_predict += model_pred
        
    test_predict = np.where(np.array(test_predict) > threshold, 1, 0)
    print('Done.')
    return test_predict

In [86]:
preds = inference(best_student_model, test_loaders, best_threshold, device)

  0%|          | 0/24 [00:00<?, ?it/s]

Done.


## Submit

In [87]:
submit = pd.read_csv(f'{data_path}sample_submission.csv')
submit['Y_LABEL'] = preds
submit.head()

Unnamed: 0,ID,Y_LABEL
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,0
3,TEST_0003,0
4,TEST_0004,0


In [88]:
submit.to_csv('./submit.csv', index=False)

In [89]:
pd.read_csv('submit.csv')['Y_LABEL'].mean()

0.09766594934613475