In [1]:
import numpy as np
import pandas as pd
import bloscpack as bp

import imgaug as ia
import imgaug.augmenters as iaa

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import StratifiedKFold

from torch.utils.data.dataloader import DataLoader

import fastai
from fastai.vision import *
from fastai.callbacks import *

# from optim import Over9000
from torch.optim import Adam
from data import Bengaliai_DS
from callback_utils import SaveModelCallback
from mixup_fastai_utils import CmCallback, MuCmCallback
from loss import Loss_combine_weighted
from metric import Metric_grapheme, Metric_vowel, Metric_consonant, Metric_tot
from models_mg import Simple50GeM

---

In [2]:
SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

---
### data

In [3]:
augs = iaa.SomeOf(
    (1, 3),
    [
        iaa.SomeOf(
            (1, 2),
            [
                iaa.OneOf(
                    [
                        iaa.Affine(scale={"x": (0.8, 1.), "y": (0.8, 1.)}, rotate=(-15, 15), shear=(-15, 15)),
                        iaa.PerspectiveTransform(scale=.08, keep_size=True),
                    ]
                ),
                iaa.PiecewiseAffine(scale=.04),
            ],
            random_order=True
        ),
        iaa.DirectedEdgeDetect(alpha=(.6, .8), direction=(0.0, 1.0)),
        iaa.JpegCompression(compression=(90, 99)),
    ],
    random_order=True
)


In [4]:
pdf = pd.read_csv('../input/train.csv')

In [5]:
unique_grapheme = pdf['grapheme'].unique()
grapheme_code = dict([(g, c) for g, c in zip(unique_grapheme, np.arange(unique_grapheme.shape[0]))])
pdf['grapheme_code'] = [grapheme_code[g] for g in pdf['grapheme']]

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for trn_ndx, vld_ndx in skf.split(pdf['grapheme_code'], pdf['grapheme_code']):
    break
    
trn_pdf = pdf.iloc[trn_ndx, :]
trn_pdf.reset_index(inplace=True, drop=True)
imgs = bp.unpack_ndarray_from_file('../features/train_images_size168_pad4.bloscpack')
# lbls = pdf.loc[:, ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic', 'grapheme_code']].values
lbls = pdf.loc[:, ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].values

trn_imgs = imgs[trn_ndx]
trn_lbls = lbls[trn_ndx]
vld_imgs = imgs[vld_ndx]
vld_lbls = lbls[vld_ndx]

In [6]:
training_set = Bengaliai_DS(trn_imgs, trn_lbls, transform=augs)
validation_set = Bengaliai_DS(vld_imgs, vld_lbls)

training_loader = DataLoader(training_set, batch_size=64, num_workers=6, shuffle=True) # , sampler=sampler
validation_loader = DataLoader(validation_set, batch_size=64, num_workers=6, shuffle=False)

data_bunch = DataBunch(train_dl=training_loader, valid_dl=validation_loader)

---
### model

In [7]:
n_grapheme = 168
n_vowel = 11
n_consonant = 7
classifier = Simple50GeM()

In [8]:
logging_name = 'Simple50GeM_AllMish_Augs_CmMu_1of7'

learn = Learner(
    data_bunch,
    classifier,
    loss_func=Loss_combine_weighted(),
    opt_func=Adam,
    metrics=[Metric_grapheme(), Metric_vowel(), Metric_consonant(), Metric_tot()]
)

logger = CSVLogger(learn, logging_name)

# learn.clip_grad = 1.0
# learn.split([classifier.cls])
learn.unfreeze()

In [9]:
learn.fit(
    64,
    lr=3e-4,
    wd=0.,
    callbacks=[logger, SaveModelCallback(learn, monitor='metric_tot', mode='max', name=logging_name), ReduceLROnPlateauCallback(learn, patience=20, factor=.5, min_lr=1e-5), MuCmCallback(learn)]
)

epoch,train_loss,valid_loss,metric_idx,metric_idx.1,metric_idx.2,metric_tot,time
0,1.83384,0.279665,0.897358,0.935268,0.919933,0.912479,12:52
1,1.662769,0.216145,0.922044,0.95384,0.946366,0.936073,12:47


KeyboardInterrupt: 