<a href="https://colab.research.google.com/github/utsavnandi/hackerearth-dance/blob/master/dance_hackerearth.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installs

In [None]:
!pip install -U git+https://github.com/albu/albumentations -q
!pip install -U git+https://github.com/rwightman/pytorch-image-models -q

In [None]:
# the datafiles must be unzipped into ./data/ folder
# !unzip drive/My\ Drive/dance.zip -d ./data/

## Setup

In [None]:
import os
import gc
import time
import datetime
import random
from getpass import getpass
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from google.colab import auth
from google.cloud import storage

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.cuda.amp import GradScaler
import torchvision

import timm

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

#import neptune

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed = 43
seed_everything(seed)


  import pandas.util.testing as tm


In [None]:
DATA_DIR = './data/dataset/'

In [None]:
df_train = pd.read_csv(DATA_DIR+'train.csv')
df_test = pd.read_csv(DATA_DIR+'test.csv')
df_train.head()

Unnamed: 0,Image,target
0,96.jpg,manipuri
1,163.jpg,bharatanatyam
2,450.jpg,odissi
3,219.jpg,kathakali
4,455.jpg,odissi


In [None]:
le = LabelEncoder()
df_train['target'] = le.fit_transform(df_train['target'])

In [None]:
df_train['target'].value_counts()

5    50
6    49
2    47
0    47
3    46
7    45
1    44
4    36
Name: target, dtype: int64

## Dataset

In [None]:
class DanceDataset(Dataset):
    def __init__(self, df, isEval=True, transform=None):
        super().__init__()
        self.image_id = df['Image'].values
        self.transform = transform
        self.isEval = isEval
        if not self.isEval:
            self.labels = df['target'].values

    def __len__(self):
        return len(self.image_id)

    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()
        
        if self.isEval:
            image_name = DATA_DIR + f'test/{self.image_id[index]}'
        else:
            image_name = DATA_DIR + f'train/{self.image_id[index]}'

        image = cv2.imread(image_name, cv2.IMREAD_COLOR).astype(np.uint8)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.uint8)

        if self.transform is not None:
            image = self.transform(image=image)['image']
        else:
            image = image.astype(np.float32)
            
        if self.isEval:
            return image

        target = self.labels[index].astype(np.int64)
        return image, target

def get_datasets(labels_train, labels_test):
    datasets = {}
    datasets['train'] = DanceDataset(
        labels_train, isEval=False, transform=get_train_transform()
    )
    datasets['valid'] = DanceDataset(
        labels_test, isEval=False, transform=get_valid_transform()
    )
    return datasets


## Augmentations

In [None]:
IMG_SIZE = 300
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

def get_train_transform(p=1.0):
    return A.Compose([
        A.Resize(IMG_SIZE, IMG_SIZE, interpolation=2, always_apply=True, p=1),
        A.CenterCrop(int(IMG_SIZE/2), int(IMG_SIZE/2), p=0.25),
        A.Resize(IMG_SIZE, IMG_SIZE, interpolation=2, always_apply=True, p=1),
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(
            interpolation=2,
            shift_limit=0.2, scale_limit=0.2, 
            rotate_limit=15, p=0.3
        ),
        A.HueSaturationValue(
            hue_shift_limit=20, sat_shift_limit=30, 
            val_shift_limit=20, p=0.3
        ),
        A.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
        ToTensorV2(p=1.0),
    ], p=p)

def get_valid_transform():
    return A.Compose([
        A.Resize(IMG_SIZE, IMG_SIZE, interpolation=2, always_apply=True, p=1),
        A.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
        ToTensorV2(p=1.0),
    ])


## Models

In [None]:

class ResNet18(nn.Module): 

    def __init__(self):
        super().__init__()
        self.model = torchvision.models.resnet18(pretrained=True)
        in_features = self.model.fc.in_features
        self.model.fc = nn.Linear(in_features, 8)
        
    def forward(self, x):
        x = self.model(x)
        return x

class Model(nn.Module):

    def __init__(self, name='tf_efficientnet_b0'):
        super().__init__()
        self.model = timm.create_model(name, pretrained=True)
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(in_features, 8)

    def forward(self, x):
        x = self.model(x)
        return x


## Loss and Score

In [None]:
def criterion(y_pred, y_true):
    return nn.CrossEntropyLoss()(y_pred, y_true)

def get_f1(y_pred, y_true):
    return f1_score(y_true.cpu().numpy(), y_pred.cpu().numpy(), average='weighted')


## Train script

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def train_one_epoch(loader, model, optimizer, scheduler=None, log=False):
    model.train()
    running_loss = 0.0
    for idx, (images, targets) in enumerate(loader):
        images = images.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        y_pred = model(images.float())
        loss = criterion(y_pred, targets)
        running_loss += float(loss.item())
        
        loss.backward()
        optimizer.step()

        if scheduler is not None:
            scheduler.step()
        
        if (idx+1) % FLAGS['log_interval'] == 0 and log==True:
            print("Loss/train", float(loss))

    return running_loss/len(loader)

def val_one_epoch(loader, model):
    model.eval()
    running_loss = 0.0
    running_f1 = 0.0
    with torch.no_grad():
        for idx, (images, targets) in enumerate(loader):
            images = images.to(device)
            targets = targets.to(device)
            y_pred = model(images.float())
            loss = criterion(y_pred, targets)
            running_loss += float(loss)
            running_f1 += float((get_f1(y_pred.argmax(1).float(), targets)).mean())

    return running_loss/len(loader), running_f1/len(loader)


In [None]:

def fit(labels_train, labels_test, log=False):
    global FLAGS
    best_score = 0.0

    #model = ResNet18().to(device)
    model = Model(FLAGS['model_name']).to(device)

    optimizer = torch.optim.AdamW(
        model.parameters(), 
        lr=FLAGS['learning_rate'], 
        weight_decay=FLAGS['weight_decay']
    )

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 'max', factor=0.5, verbose=True, patience=2
    )
    
    datasets = get_datasets(labels_train, labels_test)
    
    #loaders
    train_loader = DataLoader(
        datasets['train'], batch_size=FLAGS['batch_size'], 
        num_workers=FLAGS['num_workers'],
        shuffle=True, pin_memory=True,
    )
    val_loader = DataLoader(
        datasets['valid'], batch_size=FLAGS['batch_size'], shuffle=False, 
        num_workers=FLAGS['num_workers'], drop_last=False
    )

    #train loop
    for epoch in range(0, FLAGS['num_epochs']):

        print('-'*27 + f'Epoch #{epoch+1} started' + '-'*27)
        
        train_loss = train_one_epoch(
            train_loader, 
            model, optimizer, 
            scheduler=None, 
            log=log
        )
        
        print(f'Average train loss for epoch #{epoch+1} : {train_loss}')
        val_loss, f1_score = val_one_epoch(val_loader, model)
        scheduler.step(f1_score)
        print(f'Average val loss for epoch #{epoch+1} : {val_loss}')
        print(f'Average f1 score for epoch #{epoch+1} : {f1_score}')

        if (f1_score > best_score):
            best_score = f1_score
            torch.save({
                'model_state_dict': model.state_dict(),
            }, f"{FLAGS['MODEL_PATH']}{FLAGS['model_name']}.pth")

        print('-'*28 + f'Epoch #{epoch+1} ended' + '-'*28)

    best_ckpt = torch.load(f"{FLAGS['MODEL_PATH']}{FLAGS['model_name']}.pth")
    model.load_state_dict(best_ckpt['model_state_dict'])

    return model, best_score


## Cross-val train

In [None]:
FLAGS = {}
FLAGS['batch_size'] = 4
FLAGS['num_workers'] = 8
FLAGS['learning_rate'] = 1e-4
FLAGS['num_epochs'] = 25
FLAGS['weight_decay'] = 1e-2
FLAGS['log_interval'] = 25
FLAGS['img_size'] = IMG_SIZE
FLAGS['MODEL_PATH'] = './'
FLAGS['model_name'] = 'tf_efficientnet_b2'
FLAGS['folds'] = 5

def cross_val(folds=FLAGS['folds']): 
    
    total_score = []
    skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)
    skf.get_n_splits(df_train)

    for fold, (train_index, val_index) in enumerate(skf.split(df_train, df_train['target'])):
        print('='*28 + f'Fold #{fold+1} started' + '='*28)
        trained_model, best_score = fit(df_train.loc[train_index], df_train.loc[val_index])
        print(f'fold #{fold+1} best score: ', best_score)
        total_score.append(best_score)
        torch.save({
            'model_state_dict': trained_model.state_dict()
            }, f"{FLAGS['MODEL_PATH']}{FLAGS['model_name']}_fold_{fold+1}.pth")
        print('='*28 + f'Fold #{fold+1} ended' + '='*28)

    print(f'scores for all folds: {total_score}')
    print(f'avg score over {folds} folds: {np.array(total_score).mean()}')

cross_val()

# ResNet18: CV 84.5277 LB 88.06930
# EnetB0 CV 85.0458 LB 88.37636
# EnetB2 CV 88.0906432 82.38706

---------------------------Epoch #1 started---------------------------
Average train loss for epoch #1 : 1.940336188224897
Average val loss for epoch #1 : 1.614125785074736
Average f1 score for epoch #1 : 0.5903508771929825
----------------------------Epoch #1 ended----------------------------
---------------------------Epoch #2 started---------------------------
Average train loss for epoch #2 : 1.4404517673466304
Average val loss for epoch #2 : 1.0718027259174145
Average f1 score for epoch #2 : 0.6592105263157895
----------------------------Epoch #2 ended----------------------------
---------------------------Epoch #3 started---------------------------
Average train loss for epoch #3 : 1.0280645097771737
Average val loss for epoch #3 : 0.7815365869747964
Average f1 score for epoch #3 : 0.7684210526315789
----------------------------Epoch #3 ended----------------------------
---------------------------Epoch #4 started---------------------------
Average train loss for epoch #4 : 0.7466

## Inference

In [None]:
test_dataset = DanceDataset(df_test, isEval=True, transform=get_valid_transform())
test_dataloader = DataLoader(
    test_dataset, 
    batch_size=FLAGS['batch_size'], shuffle=False, 
    num_workers=FLAGS['num_workers'], drop_last=False
)


model_list = []
for i in range(0, FLAGS['folds']):
    model = Model(FLAGS['model_name']).to(device)
    #model = ResNet18().to(device)
    ckpt = torch.load(f"{FLAGS['MODEL_PATH']}{FLAGS['model_name']}_fold_{i+1}.pth")
    model.load_state_dict(ckpt['model_state_dict'])
    model.eval()
    model_list.append(model)

def test(loader, model_list):
    
    running_loss = 0.0
    predictions = []
    with torch.no_grad():
        for idx, images in tqdm(enumerate(loader), total=len(loader)):
            images = images.to(device)
            y_pred = torch.zeros((FLAGS['batch_size'], le.classes_.shape[0]), dtype=torch.float)
            for model in model_list:
                y_pred += model(images).float().cpu()
            predictions.append(y_pred.argmax(1).numpy())
    return le.inverse_transform(np.hstack(predictions))


In [None]:
df_test['target'] = test(test_dataloader, model_list)
df_test.to_csv('submission.csv', index=False)

HBox(children=(FloatProgress(value=0.0, max=39.0), HTML(value='')))




In [None]:
df_test['target'].value_counts()

odissi           27
kathak           27
sattriya         21
kathakali        19
kuchipudi        19
mohiniyattam     18
bharatanatyam    17
manipuri          8
Name: target, dtype: int64