In [1]:
# Some imports that you might need
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go

import os
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import torch 
from torchvision.datasets.utils import download_url
import torchvision as tv
import torchvision.transforms as T
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torch.utils.data import Dataset, DataLoader, IterableDataset

In [2]:
!pip install albumentations
!pip install timm

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [3]:
# ====================================================
# libraries
# ====================================================

import sys
# sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter
import scipy as sp
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from tqdm.auto import tqdm
from functools import partial
import cv2
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from albumentations import (Compose, Normalize, Resize, RandomResizedCrop, HorizontalFlip, VerticalFlip, ShiftScaleRotate, Transpose)
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform
import timm
import warnings 
warnings.filterwarnings('ignore')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from matplotlib import pyplot as plt
import joblib

For the data, you need to agree to the terms and conditions to the Kaggle competition and click download all at https://www.kaggle.com/competitions/cassava-leaf-disease-classification/data.

You can choose to store the data in your google drive and mount your drive for accessing them from the following cell.

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!yes N | unzip "/content/drive/MyDrive/CIS519/cassava-leaf-disease-classification.zip" -d "/content/drive/MyDrive/CIS519/model_input"

In [None]:
!yes N | unzip "./train_images.zip" -d "./model_input"

In [14]:
!yes N | unzip "./test_images.zip" -d "./model_input"

Archive:  ./test_images.zip
   creating: ./model_input/test_images/
  inflating: ./model_input/test_images/2216849948.jpg  
yes: standard output: Broken pipe
yes: write error


In [11]:
!rm -r ./model_input/train_images

In [7]:
# Remeber to change the path here for the corresponding files you need
train = pd.read_csv('./model_input/train.csv')
test = pd.read_csv('./model_input/sample_submission.csv')
label_map = pd.read_json('./model_input/label_num_to_disease_map.json', 
                         orient='index')
display(train.head())
display(test.head())
display(label_map)

Unnamed: 0,image_id,label
0,1000015157.jpg,0
1,1000201771.jpg,3
2,100042118.jpg,1
3,1000723321.jpg,1
4,1000812911.jpg,3


Unnamed: 0,image_id,label
0,2216849948.jpg,4


Unnamed: 0,0
0,Cassava Bacterial Blight (CBB)
1,Cassava Brown Streak Disease (CBSD)
2,Cassava Green Mottle (CGM)
3,Cassava Mosaic Disease (CMD)
4,Healthy


In [None]:
training_df,validation_df=train_test_split(train,test_size=0.1, random_state=42)

In [None]:
class CassavaDataset(Dataset):

    def __init__(self,df,data_path,mode="train",transform=None):
        super().__init__()
        self.data_path=data_path
        self.csv_file_path = data_path+"/train.csv" if mode == "train" else data_path+"/sample_submission.csv.csv"
        self.data = df.values
        self.transform = transform
        self.data_dir = "train_images" if mode == "train" else "test_images"

    def __len__(self):

        return self.data.__len__()


    def __getitem__(self, index):
        img_name, label = self.data[index]
        img_path = os.path.join(self.data_path,self.data_dir,img_name)
        image = Image.open(img_path).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)

        return image, label



In [None]:
train_dataset = CassavaDataset(df=training_df,data_path="/content/drive/MyDrive/CIS519/CIS519project/model_input/cassava-leaf-disease-classification", transform=None)
validation_dataset=CassavaDataset(df=validation_df,data_path="/content/drive/MyDrive/CIS519/CIS519project/model_input/cassava-leaf-disease-classification", transform=None)

In [4]:
# ====================================================
# directory settings
# ====================================================

import os

OUTPUT_DIR = './model_output'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

TRAIN_PATH = './model_input/train_images'
TEST_PATH = './model_input/test_images'

In [14]:
# ====================================================
# CFG
# ====================================================

class CFG:
    print_freq=100
    num_workers = 4
    model_name = 'resnext50_32x4d'
    size = 512
    epochs = 5
    factor = 0.2
    patience = 5
    eps = 1e-6
    lr = 1e-4
    min_lr = 1e-6
    batch_size = 16
    weight_decay = 1e-6
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    seed = 42
    target_size = 5
    target_col = 'label'
    n_fold = 5
    trn_fold = [1,2,3,4,5]

In [6]:
# ====================================================
# utils
# ====================================================

def get_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')

def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

In [7]:
# ====================================================
# dataset
# ====================================================

class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.labels = df['label'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TRAIN_PATH}/{file_name}'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).long()
        return image, label

In [8]:
# ====================================================
# transformations
# ====================================================

def get_transforms(*, data):
    
    if data == 'train':
        return Compose([
            RandomResizedCrop(CFG.size, CFG.size),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
    
    elif data == 'valid':
       return Compose([
           Resize(CFG.size, CFG.size),
           Normalize(
               mean=[0.485, 0.456, 0.406],
               std=[0.229, 0.224, 0.225],
           ),
           ToTensorV2(),
       ])

In [9]:
# ====================================================
# model initialization
# ====================================================

class CustomResNext(nn.Module):
    def __init__(self, model_name='resnext50_32x4d', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, CFG.target_size)

    def forward(self, x):
        x = self.model(x)
        return x

In [10]:
# ====================================================
# helper functions
# ====================================================

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    model.train()
    start = end = time.time()
    global_step = 0
    for step, (images, labels) in enumerate(train_loader):
        data_time.update(time.time() - end)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        y_preds = model(images)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        else:
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  .format(
                   epoch+1, step, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   grad_norm=grad_norm,
                   ))
    return losses.avg

def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (images, labels) in enumerate(valid_loader):
        data_time.update(time.time() - end)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.softmax(1).to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [11]:
# ====================================================
# train loop
# ====================================================

def train_loop(folds, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)

    train_dataset = TrainDataset(train_folds, transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds, transform=get_transforms(data='valid'))

    train_loader = DataLoader(train_dataset, batch_size=CFG.batch_size, 
                              shuffle=True, num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=CFG.batch_size, 
                              shuffle=False, num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    
    model = CustomResNext(CFG.model_name, pretrained=True)
    model.to(device)

    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
  
    criterion = nn.CrossEntropyLoss()

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        start_time = time.time()
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        valid_labels = valid_folds[CFG.target_col].values
        scheduler.step(avg_val_loss)
        score = get_score(valid_labels, preds.argmax(1))
        elapsed = time.time() - start_time
        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Accuracy: {score}')
        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 'preds': preds}, OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    
    check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    valid_folds[[str(c) for c in range(5)]] = check_point['preds']
    valid_folds['preds'] = check_point['preds'].argmax(1)

    return valid_folds

In [12]:
# ====================================================
# main function
# ====================================================

def main():

    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.5f}')
    
    oof_df = pd.DataFrame()
    for fold in range(CFG.n_fold):
        if fold in CFG.trn_fold:
            _oof_df = train_loop(folds, fold)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== fold: {fold} result ==========")
            get_result(_oof_df)
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)

In [13]:
# Load training data
train = pd.read_csv('./model_input/train.csv')
# Split into folds for cross validation - we used the same split for all the models we trained!
folds = train.merge(
    pd.read_csv("./model_input/validation_data.csv")[["image_id", "fold"]], on="image_id")

if __name__ == '__main__':
    main()

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnext50_32x4d_a1h-0146ab0a.pth" to /home/ec2-user/.cache/torch/hub/checkpoints/resnext50_32x4d_a1h-0146ab0a.pth


Epoch: [1][0/1086] Data 0.804 (0.804) Elapsed 1m 8s (remain 1232m 16s) Loss: 1.5777(1.5777) Grad: 6.5746  
Epoch: [1][100/1086] Data 0.000 (0.008) Elapsed 3m 1s (remain 29m 33s) Loss: 0.9412(1.1570) Grad: 10.4587  
Epoch: [1][200/1086] Data 0.000 (0.004) Elapsed 4m 59s (remain 21m 59s) Loss: 0.7454(1.0366) Grad: 4.4660  
Epoch: [1][300/1086] Data 0.000 (0.003) Elapsed 6m 58s (remain 18m 10s) Loss: 0.7721(0.9765) Grad: 5.0295  
Epoch: [1][400/1086] Data 0.000 (0.002) Elapsed 8m 56s (remain 15m 16s) Loss: 0.7517(0.9108) Grad: 5.2932  
Epoch: [1][500/1086] Data 0.000 (0.002) Elapsed 10m 54s (remain 12m 44s) Loss: 0.9873(0.8592) Grad: 10.3705  
Epoch: [1][600/1086] Data 0.000 (0.002) Elapsed 12m 53s (remain 10m 24s) Loss: 0.5184(0.8220) Grad: 6.1214  
Epoch: [1][700/1086] Data 0.000 (0.001) Elapsed 14m 50s (remain 8m 9s) Loss: 0.8162(0.7932) Grad: 8.5015  
Epoch: [1][800/1086] Data 0.000 (0.001) Elapsed 16m 42s (remain 5m 56s) Loss: 0.5200(0.7667) Grad: 8.2510  
Epoch: [1][900/1086] Data 0

Epoch 1 - avg_train_loss: 0.7070  avg_val_loss: 0.4753  time: 1408s
Epoch 1 - Accuracy: 0.8428001993024414
Epoch 1 - Save Best Score: 0.8428 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 15s (remain 0m 0s) Loss: 0.3287(0.4753) 
Epoch: [2][0/1086] Data 0.491 (0.491) Elapsed 0m 1s (remain 30m 6s) Loss: 0.8758(0.8758) Grad: 6.7841  
Epoch: [2][100/1086] Data 0.000 (0.005) Elapsed 2m 0s (remain 19m 31s) Loss: 0.6408(0.4893) Grad: 5.8968  
Epoch: [2][200/1086] Data 0.000 (0.003) Elapsed 3m 58s (remain 17m 29s) Loss: 0.4515(0.4854) Grad: 6.5698  
Epoch: [2][300/1086] Data 0.000 (0.002) Elapsed 5m 56s (remain 15m 29s) Loss: 0.2862(0.4828) Grad: 4.7107  
Epoch: [2][400/1086] Data 0.000 (0.002) Elapsed 7m 54s (remain 13m 30s) Loss: 0.6396(0.4874) Grad: 4.9746  
Epoch: [2][500/1086] Data 0.000 (0.001) Elapsed 9m 52s (remain 11m 31s) Loss: 0.2734(0.4914) Grad: 5.0569  
Epoch: [2][600/1086] Data 0.000 (0.001) Elapsed 11m 50s (remain 9m 33s) Loss: 0.2562(0.4940) Grad: 4.2103  
Epoch: [2][700/1086] Data 0.000 (0.001) Elapsed 13m 48s (remain 7m 35s) Loss: 0.5864(0.4937) Grad: 4.1037  
Epoch: [2][800/1086] Data 0.000 (0.001) Elapsed 15m 47

Epoch 2 - avg_train_loss: 0.4866  avg_val_loss: 0.3913  time: 1361s
Epoch 2 - Accuracy: 0.874439461883408
Epoch 2 - Save Best Score: 0.8744 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 15s (remain 0m 0s) Loss: 0.4172(0.3913) 
Epoch: [3][0/1086] Data 0.490 (0.490) Elapsed 0m 1s (remain 30m 6s) Loss: 0.3003(0.3003) Grad: 4.8208  
Epoch: [3][100/1086] Data 0.000 (0.005) Elapsed 2m 0s (remain 19m 31s) Loss: 0.7379(0.4317) Grad: 5.4568  
Epoch: [3][200/1086] Data 0.000 (0.003) Elapsed 3m 58s (remain 17m 31s) Loss: 0.7531(0.4468) Grad: 5.2964  
Epoch: [3][300/1086] Data 0.000 (0.002) Elapsed 5m 57s (remain 15m 32s) Loss: 0.3320(0.4503) Grad: 3.4576  
Epoch: [3][400/1086] Data 0.000 (0.002) Elapsed 7m 56s (remain 13m 33s) Loss: 0.2020(0.4543) Grad: 3.3729  
Epoch: [3][500/1086] Data 0.000 (0.001) Elapsed 9m 54s (remain 11m 34s) Loss: 0.3352(0.4575) Grad: 4.2524  
Epoch: [3][600/1086] Data 0.000 (0.001) Elapsed 11m 53s (remain 9m 35s) Loss: 0.3844(0.4562) Grad: 3.5613  
Epoch: [3][700/1086] Data 0.000 (0.001) Elapsed 13m 46s (remain 7m 33s) Loss: 0.4877(0.4564) Grad: 4.4977  
Epoch: [3][800/1086] Data 0.000 (0.001) Elapsed 15m 37

Epoch 3 - avg_train_loss: 0.4527  avg_val_loss: 0.4122  time: 1328s
Epoch 3 - Accuracy: 0.8607374190333832


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 12s (remain 0m 0s) Loss: 0.4442(0.4122) 
Epoch: [4][0/1086] Data 0.434 (0.434) Elapsed 0m 1s (remain 28m 41s) Loss: 0.6949(0.6949) Grad: 5.0201  
Epoch: [4][100/1086] Data 0.000 (0.005) Elapsed 1m 59s (remain 19m 25s) Loss: 0.9912(0.4199) Grad: 7.6145  
Epoch: [4][200/1086] Data 0.000 (0.002) Elapsed 3m 52s (remain 17m 3s) Loss: 0.5452(0.4233) Grad: 4.6620  
Epoch: [4][300/1086] Data 0.000 (0.002) Elapsed 5m 44s (remain 14m 57s) Loss: 0.6142(0.4314) Grad: 6.5462  
Epoch: [4][400/1086] Data 0.000 (0.001) Elapsed 7m 38s (remain 13m 3s) Loss: 0.4013(0.4303) Grad: 3.5622  
Epoch: [4][500/1086] Data 0.000 (0.001) Elapsed 9m 36s (remain 11m 13s) Loss: 0.5161(0.4301) Grad: 4.2810  
Epoch: [4][600/1086] Data 0.000 (0.001) Elapsed 11m 35s (remain 9m 21s) Loss: 0.4330(0.4269) Grad: 3.4942  
Epoch: [4][700/1086] Data 0.000 (0.001) Elapsed 13m 34s (remain 7m 27s) Loss: 0.2945(0.4272) Grad: 5.2816  
Epoch: [4][800/1086] Data 0.000 (0.001) Elapsed 15m 32

Epoch 4 - avg_train_loss: 0.4254  avg_val_loss: 0.3725  time: 1346s
Epoch 4 - Accuracy: 0.882660687593423
Epoch 4 - Save Best Score: 0.8827 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 15s (remain 0m 0s) Loss: 0.3711(0.3725) 
Epoch: [5][0/1086] Data 0.459 (0.459) Elapsed 0m 1s (remain 29m 31s) Loss: 0.5600(0.5600) Grad: 6.5942  
Epoch: [5][100/1086] Data 0.000 (0.005) Elapsed 1m 56s (remain 18m 52s) Loss: 0.3573(0.4189) Grad: 6.3909  
Epoch: [5][200/1086] Data 0.000 (0.003) Elapsed 3m 47s (remain 16m 42s) Loss: 0.3238(0.4236) Grad: 5.0967  
Epoch: [5][300/1086] Data 0.000 (0.002) Elapsed 5m 40s (remain 14m 49s) Loss: 0.4561(0.4155) Grad: 6.3429  
Epoch: [5][400/1086] Data 0.000 (0.001) Elapsed 7m 38s (remain 13m 2s) Loss: 0.3621(0.4118) Grad: 2.7353  
Epoch: [5][500/1086] Data 0.000 (0.001) Elapsed 9m 36s (remain 11m 12s) Loss: 0.0983(0.4107) Grad: 1.8270  
Epoch: [5][600/1086] Data 0.000 (0.001) Elapsed 11m 34s (remain 9m 20s) Loss: 0.4304(0.4175) Grad: 3.7187  
Epoch: [5][700/1086] Data 0.000 (0.001) Elapsed 13m 33s (remain 7m 26s) Loss: 0.1383(0.4115) Grad: 1.5770  
Epoch: [5][800/1086] Data 0.000 (0.001) Elapsed 15m 3

Epoch 5 - avg_train_loss: 0.4082  avg_val_loss: 0.3883  time: 1330s
Epoch 5 - Accuracy: 0.8771798704534131
Score: 0.88266


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 11s (remain 0m 0s) Loss: 0.4179(0.3883) 
Epoch: [1][0/1086] Data 0.563 (0.563) Elapsed 0m 1s (remain 30m 35s) Loss: 1.6168(1.6168) Grad: 14.2316  
Epoch: [1][100/1086] Data 0.000 (0.006) Elapsed 1m 57s (remain 19m 5s) Loss: 0.8753(1.1693) Grad: 6.4066  


KeyboardInterrupt: 

In [15]:
# ====================================================
# main function
# ====================================================

def main():

    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.5f}')
    
    oof_df = pd.DataFrame()
    for fold in range(2, CFG.n_fold):
        if fold in CFG.trn_fold:
            _oof_df = train_loop(folds, fold)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== fold: {fold} result ==========")
            get_result(_oof_df)
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)

In [None]:
# Load training data
train = pd.read_csv('./model_input/train.csv')
# Split into folds for cross validation - we used the same split for all the models we trained!
folds = train.merge(
    pd.read_csv("./model_input/validation_data.csv")[["image_id", "fold"]], on="image_id")

if __name__ == '__main__':
    main()



Epoch: [1][0/1086] Data 0.630 (0.630) Elapsed 0m 1s (remain 31m 21s) Loss: 1.5923(1.5923) Grad: 12.9974  
Epoch: [1][100/1086] Data 0.000 (0.006) Elapsed 1m 52s (remain 18m 19s) Loss: 1.0544(1.1423) Grad: 4.8015  
Epoch: [1][200/1086] Data 0.000 (0.003) Elapsed 3m 44s (remain 16m 26s) Loss: 0.6551(1.0036) Grad: 3.8610  
Epoch: [1][300/1086] Data 0.000 (0.002) Elapsed 5m 35s (remain 14m 34s) Loss: 0.6852(0.9355) Grad: 5.0368  
Epoch: [1][400/1086] Data 0.000 (0.002) Elapsed 7m 27s (remain 12m 44s) Loss: 0.9020(0.8854) Grad: 9.9956  
Epoch: [1][500/1086] Data 0.000 (0.002) Elapsed 9m 23s (remain 10m 57s) Loss: 0.9796(0.8519) Grad: 15.0070  
Epoch: [1][600/1086] Data 0.000 (0.001) Elapsed 11m 21s (remain 9m 10s) Loss: 0.4605(0.8178) Grad: 4.2032  
Epoch: [1][700/1086] Data 0.000 (0.001) Elapsed 13m 16s (remain 7m 17s) Loss: 0.5054(0.7836) Grad: 5.3356  
Epoch: [1][800/1086] Data 0.000 (0.001) Elapsed 15m 7s (remain 5m 22s) Loss: 0.6997(0.7555) Grad: 10.5782  
Epoch: [1][900/1086] Data 0.0

Epoch 1 - avg_train_loss: 0.6981  avg_val_loss: 0.4893  time: 1297s
Epoch 1 - Accuracy: 0.8293472845042352
Epoch 1 - Save Best Score: 0.8293 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 12s (remain 0m 0s) Loss: 0.5870(0.4893) 
Epoch: [2][0/1086] Data 0.538 (0.538) Elapsed 0m 1s (remain 30m 12s) Loss: 0.5739(0.5739) Grad: 7.1899  
Epoch: [2][100/1086] Data 0.000 (0.006) Elapsed 1m 58s (remain 19m 14s) Loss: 0.3229(0.5365) Grad: 4.3420  
Epoch: [2][200/1086] Data 0.000 (0.003) Elapsed 3m 57s (remain 17m 23s) Loss: 0.9912(0.5427) Grad: 11.3664  
Epoch: [2][300/1086] Data 0.000 (0.002) Elapsed 5m 49s (remain 15m 11s) Loss: 0.5468(0.5212) Grad: 7.5304  
Epoch: [2][400/1086] Data 0.000 (0.002) Elapsed 7m 41s (remain 13m 8s) Loss: 0.5554(0.5147) Grad: 5.4555  
Epoch: [2][500/1086] Data 0.000 (0.001) Elapsed 9m 36s (remain 11m 13s) Loss: 0.3149(0.5142) Grad: 4.2823  
Epoch: [2][600/1086] Data 0.000 (0.001) Elapsed 11m 34s (remain 9m 20s) Loss: 0.6114(0.5170) Grad: 7.9084  
Epoch: [2][700/1086] Data 0.000 (0.001) Elapsed 13m 32s (remain 7m 26s) Loss: 0.5420(0.5140) Grad: 5.5585  
Epoch: [2][800/1086] Data 0.000 (0.001) Elapsed 15m 

Epoch 2 - avg_train_loss: 0.5008  avg_val_loss: 0.4295  time: 1329s
Epoch 2 - Accuracy: 0.8515196811160937
Epoch 2 - Save Best Score: 0.8515 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 15s (remain 0m 0s) Loss: 0.3661(0.4295) 
Epoch: [3][0/1086] Data 0.538 (0.538) Elapsed 0m 1s (remain 30m 54s) Loss: 0.4875(0.4875) Grad: 5.5275  
Epoch: [3][100/1086] Data 0.000 (0.006) Elapsed 1m 59s (remain 19m 27s) Loss: 0.6809(0.4230) Grad: 6.9744  
Epoch: [3][200/1086] Data 0.000 (0.003) Elapsed 3m 54s (remain 17m 13s) Loss: 0.5473(0.4558) Grad: 6.0309  
Epoch: [3][300/1086] Data 0.000 (0.002) Elapsed 5m 46s (remain 15m 2s) Loss: 0.4417(0.4523) Grad: 3.4208  
Epoch: [3][400/1086] Data 0.000 (0.002) Elapsed 7m 39s (remain 13m 4s) Loss: 0.4337(0.4589) Grad: 3.8971  
Epoch: [3][500/1086] Data 0.000 (0.001) Elapsed 9m 37s (remain 11m 13s) Loss: 0.4857(0.4665) Grad: 5.1843  
Epoch: [3][600/1086] Data 0.000 (0.001) Elapsed 11m 35s (remain 9m 21s) Loss: 0.5634(0.4596) Grad: 5.8224  
Epoch: [3][700/1086] Data 0.000 (0.001) Elapsed 13m 33s (remain 7m 27s) Loss: 0.3460(0.4588) Grad: 3.9686  
Epoch: [3][800/1086] Data 0.000 (0.001) Elapsed 15m 27

Epoch 3 - avg_train_loss: 0.4523  avg_val_loss: 0.4060  time: 1319s
Epoch 3 - Accuracy: 0.8644743398106627
Epoch 3 - Save Best Score: 0.8645 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 13s (remain 0m 0s) Loss: 0.3251(0.4060) 
Epoch: [4][0/1086] Data 0.561 (0.561) Elapsed 0m 1s (remain 31m 8s) Loss: 0.1438(0.1438) Grad: 2.2876  
Epoch: [4][100/1086] Data 0.000 (0.006) Elapsed 1m 59s (remain 19m 29s) Loss: 0.2888(0.4618) Grad: 3.3707  
Epoch: [4][200/1086] Data 0.000 (0.003) Elapsed 3m 58s (remain 17m 29s) Loss: 0.1197(0.4251) Grad: 3.4729  
Epoch: [4][300/1086] Data 0.000 (0.002) Elapsed 5m 56s (remain 15m 30s) Loss: 0.2615(0.4395) Grad: 4.0587  
Epoch: [4][400/1086] Data 0.000 (0.002) Elapsed 7m 55s (remain 13m 31s) Loss: 0.5511(0.4332) Grad: 6.1129  
Epoch: [4][500/1086] Data 0.000 (0.001) Elapsed 9m 47s (remain 11m 26s) Loss: 0.5073(0.4314) Grad: 5.3583  
Epoch: [4][600/1086] Data 0.000 (0.001) Elapsed 11m 39s (remain 9m 24s) Loss: 0.7720(0.4258) Grad: 7.2849  
Epoch: [4][700/1086] Data 0.000 (0.001) Elapsed 13m 32s (remain 7m 26s) Loss: 0.5841(0.4239) Grad: 4.5610  
Epoch: [4][800/1086] Data 0.000 (0.001) Elapsed 15m 2

Epoch 4 - avg_train_loss: 0.4289  avg_val_loss: 0.3779  time: 1321s
Epoch 4 - Accuracy: 0.8709516691579472
Epoch 4 - Save Best Score: 0.8710 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 11s (remain 0m 0s) Loss: 0.3266(0.3779) 
Epoch: [5][0/1086] Data 0.499 (0.499) Elapsed 0m 1s (remain 29m 3s) Loss: 0.2145(0.2145) Grad: 2.5577  
Epoch: [5][100/1086] Data 0.000 (0.005) Elapsed 1m 53s (remain 18m 31s) Loss: 0.4265(0.4319) Grad: 6.0398  
Epoch: [5][200/1086] Data 0.000 (0.003) Elapsed 3m 50s (remain 16m 53s) Loss: 0.3690(0.4280) Grad: 4.3648  
Epoch: [5][300/1086] Data 0.000 (0.002) Elapsed 5m 48s (remain 15m 9s) Loss: 0.3359(0.4187) Grad: 4.5717  
Epoch: [5][400/1086] Data 0.000 (0.002) Elapsed 7m 46s (remain 13m 17s) Loss: 0.1199(0.4147) Grad: 1.7226  
Epoch: [5][500/1086] Data 0.000 (0.001) Elapsed 9m 38s (remain 11m 15s) Loss: 0.4136(0.4136) Grad: 5.0708  
Epoch: [5][600/1086] Data 0.000 (0.001) Elapsed 11m 30s (remain 9m 17s) Loss: 0.3261(0.4069) Grad: 4.2578  
Epoch: [5][700/1086] Data 0.000 (0.001) Elapsed 13m 25s (remain 7m 22s) Loss: 0.5109(0.4078) Grad: 4.8586  
Epoch: [5][800/1086] Data 0.000 (0.001) Elapsed 15m 23

Epoch 5 - avg_train_loss: 0.4078  avg_val_loss: 0.3818  time: 1323s
Epoch 5 - Accuracy: 0.8689586447433981
Score: 0.87095


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 11s (remain 0m 0s) Loss: 0.2665(0.3818) 
Epoch: [1][0/1087] Data 0.576 (0.576) Elapsed 0m 1s (remain 30m 46s) Loss: 1.6216(1.6216) Grad: 6.7746  
Epoch: [1][100/1087] Data 0.000 (0.006) Elapsed 1m 57s (remain 19m 2s) Loss: 0.8850(1.1823) Grad: 7.2710  
Epoch: [1][200/1087] Data 0.000 (0.003) Elapsed 3m 55s (remain 17m 17s) Loss: 1.0218(1.0325) Grad: 8.0284  
Epoch: [1][300/1087] Data 0.000 (0.002) Elapsed 5m 48s (remain 15m 9s) Loss: 1.4210(0.9555) Grad: 19.9200  
Epoch: [1][400/1087] Data 0.000 (0.002) Elapsed 7m 39s (remain 13m 6s) Loss: 0.7012(0.8996) Grad: 8.6154  
Epoch: [1][500/1087] Data 0.000 (0.001) Elapsed 9m 31s (remain 11m 8s) Loss: 0.9753(0.8536) Grad: 7.2892  
Epoch: [1][600/1087] Data 0.000 (0.001) Elapsed 11m 23s (remain 9m 12s) Loss: 0.2570(0.8137) Grad: 3.8053  
Epoch: [1][700/1087] Data 0.000 (0.001) Elapsed 13m 17s (remain 7m 18s) Loss: 0.3630(0.7781) Grad: 5.6457  
Epoch: [1][800/1087] Data 0.000 (0.001) Elapsed 15m 15s

Epoch 1 - avg_train_loss: 0.7006  avg_val_loss: 0.4600  time: 1329s
Epoch 1 - Accuracy: 0.842368223832126
Epoch 1 - Save Best Score: 0.8424 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 14s (remain 0m 0s) Loss: 0.5071(0.4600) 
Epoch: [2][0/1087] Data 0.597 (0.597) Elapsed 0m 1s (remain 31m 58s) Loss: 0.5685(0.5685) Grad: 3.8201  
Epoch: [2][100/1087] Data 0.000 (0.006) Elapsed 1m 58s (remain 19m 12s) Loss: 0.6191(0.5203) Grad: 7.4935  
Epoch: [2][200/1087] Data 0.000 (0.003) Elapsed 3m 49s (remain 16m 51s) Loss: 0.5671(0.5127) Grad: 7.1601  
Epoch: [2][300/1087] Data 0.000 (0.002) Elapsed 5m 42s (remain 14m 53s) Loss: 0.3186(0.5041) Grad: 4.1751  
Epoch: [2][400/1087] Data 0.000 (0.002) Elapsed 7m 39s (remain 13m 5s) Loss: 0.4515(0.5073) Grad: 5.0024  
Epoch: [2][500/1087] Data 0.000 (0.001) Elapsed 9m 37s (remain 11m 15s) Loss: 0.3650(0.5122) Grad: 4.9895  
Epoch: [2][600/1087] Data 0.000 (0.001) Elapsed 11m 36s (remain 9m 22s) Loss: 0.5439(0.5105) Grad: 5.8723  
Epoch: [2][700/1087] Data 0.000 (0.001) Elapsed 13m 34s (remain 7m 28s) Loss: 0.8773(0.5096) Grad: 8.3447  
Epoch: [2][800/1087] Data 0.000 (0.001) Elapsed 15m 3

Epoch 2 - avg_train_loss: 0.4954  avg_val_loss: 0.4028  time: 1331s
Epoch 2 - Accuracy: 0.8656007994004496
Epoch 2 - Save Best Score: 0.8656 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 14s (remain 0m 0s) Loss: 0.8962(0.4028) 
Epoch: [3][0/1087] Data 0.531 (0.531) Elapsed 0m 1s (remain 30m 42s) Loss: 0.3263(0.3263) Grad: 6.1806  
Epoch: [3][100/1087] Data 0.000 (0.006) Elapsed 1m 59s (remain 19m 31s) Loss: 0.4814(0.4606) Grad: 5.7346  
Epoch: [3][200/1087] Data 0.000 (0.003) Elapsed 3m 58s (remain 17m 30s) Loss: 0.5821(0.4516) Grad: 6.0021  
Epoch: [3][300/1087] Data 0.000 (0.002) Elapsed 5m 56s (remain 15m 31s) Loss: 0.3966(0.4512) Grad: 5.0365  
Epoch: [3][400/1087] Data 0.000 (0.002) Elapsed 7m 54s (remain 13m 32s) Loss: 0.4084(0.4446) Grad: 5.0734  
Epoch: [3][500/1087] Data 0.000 (0.001) Elapsed 9m 48s (remain 11m 28s) Loss: 0.1181(0.4425) Grad: 1.8757  
Epoch: [3][600/1087] Data 0.000 (0.001) Elapsed 11m 40s (remain 9m 26s) Loss: 0.5785(0.4429) Grad: 6.4335  
Epoch: [3][700/1087] Data 0.000 (0.001) Elapsed 13m 33s (remain 7m 28s) Loss: 0.4731(0.4495) Grad: 4.0985  
Epoch: [3][800/1087] Data 0.000 (0.001) Elapsed 15m 

Epoch 3 - avg_train_loss: 0.4508  avg_val_loss: 0.3830  time: 1331s
Epoch 3 - Accuracy: 0.8790906819885086
Epoch 3 - Save Best Score: 0.8791 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 10s (remain 0m 0s) Loss: 1.4396(0.3830) 
Epoch: [4][0/1087] Data 0.542 (0.542) Elapsed 0m 1s (remain 29m 46s) Loss: 0.2961(0.2961) Grad: 3.7309  
Epoch: [4][100/1087] Data 0.000 (0.006) Elapsed 1m 52s (remain 18m 21s) Loss: 0.2233(0.4235) Grad: 3.2926  
Epoch: [4][200/1087] Data 0.000 (0.003) Elapsed 3m 46s (remain 16m 37s) Loss: 0.1895(0.4221) Grad: 2.1064  
Epoch: [4][300/1087] Data 0.000 (0.002) Elapsed 5m 43s (remain 14m 57s) Loss: 0.6817(0.4291) Grad: 7.3252  
Epoch: [4][400/1087] Data 0.000 (0.002) Elapsed 7m 42s (remain 13m 10s) Loss: 0.3905(0.4260) Grad: 5.9250  
Epoch: [4][500/1087] Data 0.000 (0.001) Elapsed 9m 40s (remain 11m 18s) Loss: 0.2465(0.4200) Grad: 3.4553  
Epoch: [4][600/1087] Data 0.000 (0.001) Elapsed 11m 39s (remain 9m 25s) Loss: 0.1118(0.4218) Grad: 2.0310  
Epoch: [4][700/1087] Data 0.000 (0.001) Elapsed 13m 37s (remain 7m 30s) Loss: 0.8139(0.4233) Grad: 7.7594  
Epoch: [4][800/1087] Data 0.000 (0.001) Elapsed 15m 

Epoch 4 - avg_train_loss: 0.4199  avg_val_loss: 0.3750  time: 1350s
Epoch 4 - Accuracy: 0.8780914314264302


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 15s (remain 0m 0s) Loss: 1.7017(0.3750) 
Epoch: [5][0/1087] Data 0.563 (0.563) Elapsed 0m 1s (remain 31m 27s) Loss: 0.3527(0.3527) Grad: 3.2000  
Epoch: [5][100/1087] Data 0.000 (0.006) Elapsed 1m 59s (remain 19m 31s) Loss: 0.3104(0.4646) Grad: 4.8679  
Epoch: [5][200/1087] Data 0.000 (0.003) Elapsed 3m 58s (remain 17m 29s) Loss: 0.4660(0.4325) Grad: 3.3968  
Epoch: [5][300/1087] Data 0.000 (0.002) Elapsed 5m 55s (remain 15m 28s) Loss: 0.1625(0.4304) Grad: 3.0178  
Epoch: [5][400/1087] Data 0.000 (0.002) Elapsed 7m 47s (remain 13m 19s) Loss: 0.3384(0.4296) Grad: 3.8619  
Epoch: [5][500/1087] Data 0.000 (0.001) Elapsed 9m 39s (remain 11m 17s) Loss: 0.4630(0.4279) Grad: 5.9403  
Epoch: [5][600/1087] Data 0.000 (0.001) Elapsed 11m 34s (remain 9m 21s) Loss: 0.5886(0.4193) Grad: 4.4754  
Epoch: [5][700/1087] Data 0.000 (0.001) Elapsed 13m 33s (remain 7m 27s) Loss: 0.3647(0.4193) Grad: 4.4584  
Epoch: [5][800/1087] Data 0.000 (0.001) Elapsed 15m 

Epoch 5 - avg_train_loss: 0.4086  avg_val_loss: 0.3847  time: 1327s
Epoch 5 - Accuracy: 0.874344241818636
Score: 0.87909


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 11s (remain 0m 0s) Loss: 1.1576(0.3847) 
Epoch: [1][0/1086] Data 0.601 (0.601) Elapsed 0m 1s (remain 31m 20s) Loss: 1.5949(1.5949) Grad: 14.6246  
Epoch: [1][100/1086] Data 0.000 (0.006) Elapsed 1m 57s (remain 19m 4s) Loss: 0.6328(1.1137) Grad: 4.0355  
Epoch: [1][200/1086] Data 0.000 (0.003) Elapsed 3m 55s (remain 17m 18s) Loss: 0.6137(0.9797) Grad: 9.8800  
Epoch: [1][300/1086] Data 0.000 (0.002) Elapsed 5m 54s (remain 15m 23s) Loss: 0.9533(0.9253) Grad: 4.6046  
Epoch: [1][400/1086] Data 0.000 (0.002) Elapsed 7m 46s (remain 13m 16s) Loss: 0.7164(0.8696) Grad: 7.7127  
Epoch: [1][500/1086] Data 0.000 (0.001) Elapsed 9m 38s (remain 11m 15s) Loss: 0.5744(0.8348) Grad: 7.1837  
Epoch: [1][600/1086] Data 0.000 (0.001) Elapsed 11m 32s (remain 9m 18s) Loss: 0.8646(0.7971) Grad: 16.3433  
Epoch: [1][700/1086] Data 0.000 (0.001) Elapsed 13m 30s (remain 7m 24s) Loss: 0.5005(0.7653) Grad: 5.8938  
Epoch: [1][800/1086] Data 0.000 (0.001) Elapsed 15m

Epoch 1 - avg_train_loss: 0.6934  avg_val_loss: 0.4150  time: 1343s
Epoch 1 - Accuracy: 0.8560039860488291
Epoch 1 - Save Best Score: 0.8560 Model


EVAL: [250/251] Data 0.000 (0.002) Elapsed 1m 15s (remain 0m 0s) Loss: 0.0987(0.4150) 
Epoch: [2][0/1086] Data 0.561 (0.561) Elapsed 0m 1s (remain 31m 18s) Loss: 0.3545(0.3545) Grad: 4.1470  
Epoch: [2][100/1086] Data 0.000 (0.006) Elapsed 2m 0s (remain 19m 31s) Loss: 0.3135(0.5287) Grad: 3.7046  
Epoch: [2][200/1086] Data 0.000 (0.003) Elapsed 3m 58s (remain 17m 29s) Loss: 0.2336(0.5177) Grad: 2.6522  
Epoch: [2][300/1086] Data 0.000 (0.002) Elapsed 5m 56s (remain 15m 30s) Loss: 0.5270(0.5195) Grad: 7.5564  
Epoch: [2][400/1086] Data 0.000 (0.002) Elapsed 7m 55s (remain 13m 32s) Loss: 0.2338(0.5169) Grad: 4.1509  
Epoch: [2][500/1086] Data 0.000 (0.001) Elapsed 9m 54s (remain 11m 33s) Loss: 0.5108(0.5167) Grad: 7.2460  
Epoch: [2][600/1086] Data 0.000 (0.001) Elapsed 11m 52s (remain 9m 35s) Loss: 0.7141(0.5063) Grad: 10.3773  
Epoch: [2][700/1086] Data 0.000 (0.001) Elapsed 13m 50s (remain 7m 36s) Loss: 0.2545(0.5015) Grad: 4.6719  
Epoch: [2][800/1086] Data 0.000 (0.001) Elapsed 15m 