In [1]:
# general packages
import warnings
import json
import os
from PIL import Image
from glob import glob
from zipfile import ZipFile
import pandas as pd
import numpy as np

#sklearns 
from sklearn.metrics import accuracy_score
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split 
import random
import cv2
import gc
import math
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

from keras.optimizers import Adam, Nadam, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Conv2D, GlobalMaxPooling2D
from keras.layers import (MaxPooling2D, Input, Average, Activation, MaxPool2D,
                          Flatten, LeakyReLU, BatchNormalization, concatenate)
from keras import models
from keras import layers
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.callbacks import (ModelCheckpoint, LearningRateScheduler,
                             EarlyStopping, ReduceLROnPlateau, TensorBoard, CSVLogger)
from sklearn.metrics import accuracy_score, recall_score

from keras.utils import Sequence
from keras import utils as np_utils
from keras.callbacks import (Callback, ModelCheckpoint,
                                        LearningRateScheduler,EarlyStopping, 
                                        ReduceLROnPlateau,CSVLogger)

import albumentations
from PIL import Image, ImageOps, ImageEnhance
from albumentations.core.transforms_interface import ImageOnlyTransform
from albumentations.augmentations import functional as F
from albumentations import (
    HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose,DualTransform, IAAAffine,IAAPerspective
)
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import efficientnet.keras as efn 


warnings.simplefilter('ignore')
sns.set_style('whitegrid')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
SEED = 2020
batch_size = 64*8
FACTOR = 1.0
stats = (0.0692, 0.2051)

HEIGHT = 137 
WIDTH = 236
gpus = 4
dim = (int(HEIGHT * FACTOR), int(WIDTH * FACTOR))
resize_wid = int(WIDTH * FACTOR)
resize_hit = int(HEIGHT * FACTOR)

def seed_all(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

seed_all(SEED)

# load files
im_path = '../137x236/'
train = pd.read_csv('../input/bengaliai-cv19/train.csv')
test = pd.read_csv('../input/bengaliai-cv19/test.csv')
train['filename'] = train.image_id.apply(lambda filename: im_path + filename + '.png')

# top 5 samples
train.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme,filename
0,Train_0,15,9,5,ক্ট্রো,../137x236/Train_0.png
1,Train_1,159,0,0,হ,../137x236/Train_1.png
2,Train_2,22,3,5,খ্রী,../137x236/Train_2.png
3,Train_3,53,2,2,র্টি,../137x236/Train_3.png
4,Train_4,71,9,5,থ্রো,../137x236/Train_4.png


## Iterative Stratification

From [Abhishek Thakur](https://www.youtube.com/watch?v=8J5Q4mEzRtY)

In [3]:
train.loc[:, 'fold'] = -1
X = train.image_id.values
y = train[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].values


mskf = MultilabelStratifiedKFold(n_splits=5, random_state=1)
for fold, (tr, vl) in enumerate(mskf.split(X,y)):
    train.loc[vl, 'fold'] = fold
    
print(train.fold.value_counts())
train.to_csv('fold_trian', index=False)

4    40168
3    40168
2    40168
1    40168
0    40168
Name: fold, dtype: int64


In [4]:
fold_train = pd.read_csv('fold_trian')
fold_train = fold_train.sample(frac=1).reset_index(drop=True) # shufling
fold_train.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme,filename,fold
0,Train_145648,81,6,0,নৃ,../137x236/Train_145648.png,3
1,Train_24895,125,7,0,ল্কে,../137x236/Train_24895.png,0
2,Train_75994,59,7,0,ণে,../137x236/Train_75994.png,4
3,Train_442,43,0,4,জ্য,../137x236/Train_442.png,1
4,Train_177898,13,9,2,র্কো,../137x236/Train_177898.png,3


In [5]:
from albumentations.augmentations import functional as Func

class GridMask(DualTransform):
    """GridMask augmentation for image classification and object detection.
    
    Author: Qishen Ha
    Email: haqishen@gmail.com
    2020/01/29

    Args:
        num_grid (int): number of grid in a row or column.
        fill_value (int, float, lisf of int, list of float): value for dropped pixels.
        rotate ((int, int) or int): range from which a random angle is picked. If rotate is a single int
            an angle is picked from (-rotate, rotate). Default: (-90, 90)
        mode (int):
            0 - cropout a quarter of the square of each grid (left top)
            1 - reserve a quarter of the square of each grid (left top)
            2 - cropout 2 quarter of the square of each grid (left top & right bottom)

    Targets:
        image, mask

    Image types:
        uint8, float32

    Reference:
    |  https://arxiv.org/abs/2001.04086
    |  https://github.com/akuxcw/GridMask
    """

    def __init__(self, num_grid=3, fill_value=0, rotate=0, mode=0, always_apply=False, p=0.5):
        super(GridMask, self).__init__(always_apply, p)
        if isinstance(num_grid, int):
            num_grid = (num_grid, num_grid)
        if isinstance(rotate, int):
            rotate = (-rotate, rotate)
        self.num_grid = num_grid
        self.fill_value = fill_value
        self.rotate = rotate
        self.mode = mode
        self.masks = None
        self.rand_h_max = []
        self.rand_w_max = []

    def init_masks(self, height, width):
        if self.masks is None:
            self.masks = []
            n_masks = self.num_grid[1] - self.num_grid[0] + 1
            for n, n_g in enumerate(range(self.num_grid[0], self.num_grid[1] + 1, 1)):
                grid_h = height / n_g
                grid_w = width / n_g
                this_mask = np.ones((int((n_g + 1) * grid_h), int((n_g + 1) * grid_w))).astype(np.uint8)
                for i in range(n_g + 1):
                    for j in range(n_g + 1):
                        this_mask[
                             int(i * grid_h) : int(i * grid_h + grid_h / 2),
                             int(j * grid_w) : int(j * grid_w + grid_w / 2)
                        ] = self.fill_value
                        if self.mode == 2:
                            this_mask[
                                 int(i * grid_h + grid_h / 2) : int(i * grid_h + grid_h),
                                 int(j * grid_w + grid_w / 2) : int(j * grid_w + grid_w)
                            ] = self.fill_value
                
                if self.mode == 1:
                    this_mask = 1 - this_mask

                self.masks.append(this_mask)
                self.rand_h_max.append(grid_h)
                self.rand_w_max.append(grid_w)

    def apply(self, image, mask, rand_h, rand_w, angle, **params):
        h, w = image.shape[:2]
        mask = Func.rotate(mask, angle) if self.rotate[1] > 0 else mask
        mask = mask[:,:,np.newaxis] if image.ndim == 3 else mask
        image *= mask[rand_h:rand_h+h, rand_w:rand_w+w].astype(image.dtype)
        return image

    def get_params_dependent_on_targets(self, params):
        img = params['image']
        height, width = img.shape[:2]
        self.init_masks(height, width)

        mid = np.random.randint(len(self.masks))
        mask = self.masks[mid]
        rand_h = np.random.randint(self.rand_h_max[mid])
        rand_w = np.random.randint(self.rand_w_max[mid])
        angle = np.random.randint(self.rotate[0], self.rotate[1]) if self.rotate[1] > 0 else 0

        return {'mask': mask, 'rand_h': rand_h, 'rand_w': rand_w, 'angle': angle}

    @property
    def targets_as_params(self):
        return ['image']

    def get_transform_init_args_names(self):
        return ('num_grid', 'fill_value', 'rotate', 'mode')

# Grapheme Generator

In [6]:
class GraphemeGenerator(Sequence):
    def __init__(self, data, batch_size, dim, kfold = (1,), shuffle=False, transform = None):
        
        data = data[["image_id", "grapheme_root", "vowel_diacritic",
                     "consonant_diacritic", "fold"]]
        data = data[data.fold.isin(kfold)].reset_index(drop=True)
        self._data = data
        
        self._label_1 = pd.get_dummies(self._data['grapheme_root'], 
                                       columns = ['grapheme_root'])
        self._label_2 = pd.get_dummies(self._data['vowel_diacritic'], 
                                       columns = ['vowel_diacritic'])
        self._label_3 = pd.get_dummies(self._data['consonant_diacritic'], 
                                       columns = ['consonant_diacritic'])
        self._list_idx = data.index.values
        self._batch_size = batch_size
        self._dim = dim
        self._shuffle = shuffle
        self._transform = transform
        self._kfold = kfold
        self.on_epoch_end()  
        
    def __len__(self):
        return int(np.floor(len(self._data)/self._batch_size))
    
    
    def get_all_valid_y(self):
        total = self.__len__()* self._batch_size
        Target_1 = np.empty((total, 168), dtype = float)
        Target_2 = np.empty((total,  11), dtype = float)
        Target_3 = np.empty((total,   7), dtype = float)        
        for i, k in enumerate(self._indices):
            if total == i :
                break
            Target_1[i,:] = self._label_1.loc[k, :].values
            Target_2[i,:] = self._label_2.loc[k, :].values
            Target_3[i,:] = self._label_3.loc[k, :].values
        return [Target_1, Target_2, Target_3]    
    
    def __getitem__(self, index):
        batch_idx = self._indices[index*self._batch_size:(index+1)*self._batch_size]
        _idx = [self._list_idx[k] for k in batch_idx]

        Data     = np.empty((self._batch_size, *self._dim, 1))

        Target_1 = np.empty((self._batch_size, 168), dtype = float)
        Target_2 = np.empty((self._batch_size,  11), dtype = float)
        Target_3 = np.empty((self._batch_size,   7), dtype = float)
        
        for i, k in enumerate(_idx):
            image = cv2.imread(im_path + self._data['image_id'][k] + '.png') 
            image = cv2.resize(image, (resize_wid, resize_hit)) 
            if len(self._kfold) != 1: # train
                if self._transform is not None:
                    image =  self._transform(image=image)['image']
                    
            gray = lambda rgb : np.dot(rgb[... , :3] , [0.299 , 0.587, 0.114]) 
            image = gray(image) 
            
            image = (image.astype(np.float32)/255.0 - stats[0])/stats[1]
            image = image[:, :, np.newaxis]
            Data[i,:, :, :] =  image
        
            Target_1[i,:] = self._label_1.loc[k, :].values
            Target_2[i,:] = self._label_2.loc[k, :].values
            Target_3[i,:] = self._label_3.loc[k, :].values
            
        return Data, [Target_1, Target_2, Target_3]
    
    
    def on_epoch_end(self):
        self._indices = np.arange(len(self._list_idx))
        if self._shuffle:
            np.random.shuffle(self._indices)

In [7]:
class CutMixGenerator(Sequence):
    def __init__(self, generator1, generator2, cut_p = 0.2, maxcut= 0.5, mixup_p = 0.2, maxmix = 0.5):
        self.generator1 = generator1
        self.generator2 = generator2
        self.cut_p = cut_p
        self.mixup_p = mixup_p
        self.batch_size = self.generator1._batch_size
        self.maxcut = maxcut
        self.maxmix = maxmix
        self.on_epoch_end()  
        
    def __len__(self):
        return self.generator1.__len__()
        
    def get_rand_bbox(self,width, height, l):
        wcut = np.random.random()*l
        hcut = np.random.random()*l
        r_w = np.int(width * wcut)
        r_h = np.int(height * hcut)
        x = np.random.randint(width - r_w)
        y = np.random.randint(height - r_h)
        return x, y, r_w, r_h
    
    def smooth_labels(self,labels, factor=0.1):
        labels *= (1 - factor)
        labels += (factor / labels.shape[0])
        return labels

    def cutmix(self,X1, X2, y1, y2):
        width = X1.shape[1]
        height = X1.shape[0]
        x, y, r_w, r_h = self.get_rand_bbox(width, height, self.maxcut)
        X1[ y:y+r_h, x:x+r_w, :] = X2[ y:y+r_h, x:x+r_w, :]
        ra = (r_w*r_h) / (width*height)
        y= []
        for i in range(len(y1)):
            ysm1 = self.smooth_labels(y1[i])
            ysm2 = self.smooth_labels(y2[i])
            y.append((ysm1*(1.0-ra)) + (ysm2*ra))
        return X1, y
    
    def mixup(self, X1, X2, y1, y2):
        X = np.zeros(X1.shape)
        ra = np.random.random()*self.maxmix
        X = X1*(1-ra) + X2*ra
        y= []
        for i in range(len(y1)):
            ysm1 = self.smooth_labels(y1[i])
            ysm2 = self.smooth_labels(y2[i])
            y.append((ysm1*(1.0-ra)) + (ysm2*ra))
        return X,y
    
    def __getitem__(self, index):
        Data, [Target_1, Target_2, Target_3] = self.generator1.__getitem__(index)
        cutmix_idx = np.random.choice(np.arange(self.batch_size),int(self.batch_size*self.cut_p), replace=False)
        
        for idx in cutmix_idx:
            srcidx = np.random.randint(self.generator2.__len__())
            orgD, orgT_1, orgT_2, orgT_3 = Data[idx,:], Target_1[idx,:], Target_2[idx,:], Target_3[idx,:]
            srcD, [srcT_1, srcT_2, srcT_3] = self.generator2.__getitem__(srcidx)
            mD, [mT1,mT2,mT3] = self.cutmix(orgD,srcD[0], [orgT_1, orgT_2, orgT_3], [srcT_1[0], srcT_2[0], srcT_3[0]])
            Data[idx,:], [Target_1[idx,:], Target_2[idx,:], Target_3[idx,:]] = mD, [mT1,mT2,mT3]
            
            
        mixup_idx = np.random.choice(np.arange(self.batch_size),int(self.batch_size*self.mixup_p), replace=False)
        
        for idx in mixup_idx:
            srcidx = np.random.randint(self.generator2.__len__())
            orgD, orgT_1, orgT_2, orgT_3 = Data[idx,:], Target_1[idx,:], Target_2[idx,:], Target_3[idx,:]
            srcD, [srcT_1, srcT_2, srcT_3] = self.generator2.__getitem__(srcidx)
            mD, [mT1,mT2,mT3] = self.mixup(orgD,srcD[0], [orgT_1, orgT_2, orgT_3], [srcT_1[0], srcT_2[0], srcT_3[0]])
            Data[idx,:], [Target_1[idx,:], Target_2[idx,:], Target_3[idx,:]] = mD, [mT1,mT2,mT3]        
        
        return Data, [Target_1, Target_2, Target_3]

    def on_epoch_end(self):
        self.generator1.on_epoch_end()
        self.generator2.on_epoch_end()

# Data Augmentation

For the augmentation part, I'm simply following [XingJian Lyu](https://www.kaggle.com/roguekk007)'s suggestion that he'd mentioned in [Useful Baseline Data Augmentations?](https://www.kaggle.com/c/bengaliai-cv19/discussion/132642), specificly in [here](https://www.kaggle.com/c/bengaliai-cv19/discussion/132642#759415). 

            OneOf([
            IAAAdditiveGaussianNoise(),
            GaussNoise(),
            ], p=0.1),
    
            OneOf([
                MotionBlur(p=0.2),
                MedianBlur(blur_limit=3, p=0.1),
                Blur(blur_limit=3, p=0.1),
            ], p=0.1),
    
            OneOf([
                OpticalDistortion(p=0.3),
                GridDistortion(p=0.1),
                IAAPiecewiseAffine(p=0.3),
            ], p=0.1),
            OneOf([
                CLAHE(clip_limit=2),
                IAASharpen(),
                IAAEmboss(),
                RandomBrightnessContrast(),
            ], p=0.1),

In [8]:
train_transform = albumentations.Compose([
                albumentations.OneOf([
                    ShiftScaleRotate(scale_limit=.15, rotate_limit=10, 
                                     border_mode=cv2.BORDER_CONSTANT),
                ],p = 0.2),
    
            OneOf([
            MotionBlur(p=0.2),
            MedianBlur(blur_limit=3, p=0.1),
            Blur(blur_limit=3, p=0.1),
            ], p=0.1),

                albumentations.OneOf([   
                    GridMask(num_grid=(12,20), rotate=(-15,15), mode=0),
                    GridMask(num_grid=(8,20), rotate=(-15,15), mode=1),
                    GridMask(num_grid=(8,20), rotate=(-15,15), mode=2),
            ],p = 0.2)
        ])

## Visualize the samples

# Modeling

In [11]:
def copy_weights_diff_input_ch(src, tar):
    src_first = src.layers[1].get_weights()
    src_first_sum = np.sum(src_first[0],axis=2)
    tar_ch_num = tar.layers[1].get_weights()[0].shape[2]
    src_first_sum /= tar_ch_num
    tar_weight_list = []
    for i in range(tar_ch_num):
        tar_weight_list.append(src_first_sum)
    tar_first_weight = np.stack(tar_weight_list,axis=2)
    tar.layers[1].set_weights([tar_first_weight])
    
    for i in range(len(src.layers)):
        if i < 2:
            continue
        tar.layers[i].set_weights(src.layers[i].get_weights())
        
    return tar

In [12]:
input_shape = (*dim, 1)

In [13]:
from keras.utils import multi_gpu_model

In [14]:
def create_model(backbone, input_shape, slr = 0.003, gpus=1):
    init_model = backbone(input_shape=input_shape, include_top=False, weights=None)
    imagenet_model = backbone(weights='noisy-student', include_top=False)
    base_model = copy_weights_diff_input_ch(imagenet_model,init_model)
    curr_output = GlobalAveragePooling2D()(base_model.output)
    oputput1 = Dense(168,  activation='softmax', name='gra') (curr_output)
    oputput2 = Dense(11,  activation='softmax', name='vow') (curr_output)
    oputput3 = Dense(7,  activation='softmax', name='cons') (curr_output)
    output_tensor = [oputput1, oputput2, oputput3]

    model = Model(base_model.input, output_tensor)
    if gpus !=1:
        model = multi_gpu_model(model,gpus=gpus, cpu_merge=False)
    # compiling    
    model.compile(
        optimizer = Adam(lr=slr), 
        loss = {'gra' : 'categorical_crossentropy', 
                'vow' : 'categorical_crossentropy', 
                'cons': 'categorical_crossentropy'},

        loss_weights = {'gra' : 0.6,
                        'vow' : 0.2,
                        'cons': 0.2},
        metrics={'gra' : 'accuracy', 
                 'vow' : 'accuracy', 
                 'cons': 'accuracy'}
    )
    return model


## Competition Eval Metrics

The following code cell is bit modified version of the original author and unfortunately I forget to where I found it; so can't give the credit. 

In [15]:
def macro_recall(y_true, y_pred):
    return recall_score(y_true, y_pred, average='macro')

class CustomCallback(tf.keras.callbacks.Callback):
    def __init__(self, val_data, name):
        super().__init__()
        self.valid_data = val_data
        self.batch_size = self.valid_data._batch_size
        self.name = name
        self.avg_recall = []
        self.val_trues = {0: [], 1:[], 2:[]}
        batches = len(self.valid_data)
        yVal = self.valid_data.get_all_valid_y()
        for i in range(3):
            true = np.argmax(yVal[i], axis=1)
            self.val_trues[i].extend(list(true))
        
    def on_epoch_end(self, epoch, logs={}):
        self.val_preds = {0: [], 1:[], 2:[]} 
        #self.val_trues = {0: [], 1:[], 2:[]}
        
        val_preds = model.predict_generator(val_generator,workers=8)
        for i in range(3):
            preds = np.argmax(val_preds[i], axis=1)    
            self.val_preds[i].extend(list(preds))

        recalls  = []
        for i in range(3):
            recalls.append(macro_recall(self.val_trues[i], self.val_preds[i]))
            
        avg_result = np.average(recalls, weights=[2, 1, 1])
        self.avg_recall.append(avg_result)    

        if avg_result == max(self.avg_recall):
            print(len(self.val_trues[i])," sets validation Avg. Recall Improved. Saving model.")
            print(f"Avg. Recall: {round(avg_result, 4)}")
            self.model.save_weights('best_avg_recall' + self.name +'.h5')
        return
    

In [16]:
def Call_Back(name, val_generator):
    # model check point
    checkpoint = ModelCheckpoint(name+'.h5', 
                                 monitor = 'val_loss', 
                                 verbose = 0, save_best_only=True, 
                                 mode = 'min',
                                 save_weights_only = True)
    csv_logger = CSVLogger(name+'.csv')
    reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss',
                                   factor=0.3, patience=4,
                                   verbose=1, mode='auto',
                                   min_delta=0.0001, cooldown=1, min_lr=0.000001)
    custom_callback = CustomCallback(val_generator, name)
    return [checkpoint, csv_logger, reduceLROnPlat, custom_callback]

In [None]:
epochs = 50
historys = []
backbone = efn.EfficientNetB0
disc = '_mul_addaug_EF0_Fold_'
for i in range(5):
    t_fold = np.setdiff1d(np.arange(5),i)
    name = disc + str(i)

    train_generator1 = GraphemeGenerator(fold_train, batch_size, dim , 
                                        shuffle = True,  
                                        kfold = t_fold, 
                                        transform = train_transform)

    cut_src_gen = GraphemeGenerator(fold_train, 1, dim , 
                                        shuffle = True,  
                                        kfold = t_fold)

    train_generator = CutMixGenerator(
        generator1=train_generator1,
        generator2=cut_src_gen,
        mixup_p = 0.2,
        cut_p=0.2
    )
    
    val_generator = GraphemeGenerator(fold_train, 12*gpus, dim, kfold = (i,), shuffle = False)
    callbacks = Call_Back(name,val_generator)    
    
    model = create_model(backbone, input_shape, slr=0.003,gpus=gpus)
        
    train_history = model.fit_generator(
        train_generator,
        validation_data=val_generator,
        epochs=epochs,
        workers=32,
        callbacks=callbacks
    )
    historys.append(train_history)
    model.load_weights('./'+'best_avg_recall' + name +'.h5')
    single_model = model.layers[-4]
    single_model.save_weights('./best_avg_recall_single'+name+'.h5')
