In [1]:
import numpy as np
import pandas as pd
import bloscpack as bp
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import imgaug as ia
import imgaug.augmenters as iaa

In [2]:
import fastai
from fastai.vision import *

from optim import Over9000

In [3]:
from torch.utils.data.dataloader import DataLoader
from data import Bengaliai_DS, Balanced_Sampler

In [4]:
from model import *
from model_utils import *

---

In [5]:
SEED = 20190819

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

---
### data

#### stratification

In [6]:
pdf = pd.read_csv('../input/train.csv')

In [11]:
pdf.head(3)

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme,grapheme_code
0,Train_0,15,9,5,ক্ট্রো,0
1,Train_1,159,0,0,হ,1
2,Train_2,22,3,5,খ্রী,2


In [8]:
unique_grapheme = pdf['grapheme'].unique()
grapheme_code = dict([(g, c) for g, c in zip(unique_grapheme, np.arange(unique_grapheme.shape[0]))])
pdf['grapheme_code'] = [grapheme_code[g] for g in pdf['grapheme']]

In [16]:
skf = MultilabelStratifiedKFold(n_splits=5)
for trn_ndx, vld_ndx in skf.split(pdf['image_id'].values.reshape(-1, 1), pdf.loc[:, ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].values.reshape(-1, 3)):
    break

In [19]:
imgs = bp.unpack_ndarray_from_file('../features/train_images.bloscpack')
# see preprocess notebook, it's shown that the train.csv data sequence is the same and can just load like this.
lbls = pd.read_csv('../input/train.csv').iloc[:, 1:4].values

In [20]:
trn_imgs = imgs[trn_ndx]
trn_lbls = lbls[trn_ndx]

vld_imgs = imgs[vld_ndx]
vld_lbls = lbls[vld_ndx]

#### augmentation

In [21]:
augs = iaa.SomeOf(
    (0, 2),
    [
        iaa.SomeOf(
            (1, 2),
            [
                iaa.Affine(scale={"x": (0.8, 1.1), "y": (0.8, 1.1)}, rotate=(-15, 15), shear=(-15, 15)),
                iaa.PiecewiseAffine(scale=(0.02, 0.03)),
                iaa.PerspectiveTransform(scale=.08, keep_size=True),
            ],
            random_order=True
        ),
        iaa.OneOf(
            [
                iaa.DirectedEdgeDetect(alpha=(.6, .8), direction=(0.0, 1.0)),
                iaa.Emboss(alpha=(.5, 1.), strength=(.1, 4)),
            ]
        ),
    ],
    random_order=True
)
#         iaa.GaussianBlur(sigma=(0.5, 1.)),
#         iaa.OneOf(
#             [
#                 iaa.GaussianBlur(sigma=(0.5, 1.)),
#                 iaa.MotionBlur(k=(7, 13), angle=[-45, 45]),
#                 iaa.MedianBlur(k=(3, 5)),
#             ]
#         ),
#         iaa.CoarseDropout((0.05, 0.15), size_percent=(0.15, 0.35)),
#         iaa.Fog(),

In [22]:
training_set = Bengaliai_DS(trn_imgs, trn_lbls, transform=augs)
validation_set = Bengaliai_DS(vld_imgs, vld_lbls)

training_loader = DataLoader(training_set, batch_size=64, num_workers=6, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=64, num_workers=6, shuffle=False)

data_bunch = DataBunch(train_dl=training_loader, valid_dl=validation_loader)

---
### model

In [23]:
device = 'cuda:0'
n_grapheme = 168
n_vowel = 11
n_consonant = 7
n_total = n_grapheme + n_vowel + n_consonant

In [24]:
predictor = PretrainedCNN(out_dim=n_total)
classifier = BengaliClassifier(predictor)#.to(device)

In [25]:
learn = Learner(
    data_bunch,
    classifier,
    loss_func=Loss_combine(),
    opt_func=Over9000,
    metrics=[Metric_grapheme(), Metric_vowel(), Metric_consonant(), Metric_tot()]
)

logger = CSVLogger(learn, 'Seresnext_DensenetStarterSetup_Selectedaugs_Multilabelstrat_Mixup')

learn.clip_grad = 1.0
learn.split([classifier.predictor.lin_layers])
# learn.split([classifier.head1])
learn.unfreeze()

In [26]:
learn.fit_one_cycle(
    32,
    max_lr=slice(0.2e-2,1e-2),
    wd=[1e-3, 0.1e-1],
    pct_start=0.0,
    div_factor=100,
    callbacks=[logger, SaveModelCallback(learn, monitor='metric_tot', mode='max', name='Seresnext_DensenetStarterSetup_Selectedaugs_Multilabelstrat_Mixup'), MixUpCallback(learn)]
)

epoch,train_loss,valid_loss,metric_idx,metric_idx.1,metric_idx.2,metric_tot,time
0,1.373516,0.384431,0.868417,0.945251,0.879639,0.890431,14:50
1,1.20822,0.234619,0.920294,0.966577,0.960871,0.942009,14:39
2,1.102398,0.231342,0.926111,0.96947,0.955646,0.944334,14:50
3,1.019186,0.188522,0.935353,0.971179,0.96762,0.952376,14:45
4,0.964851,0.181622,0.943156,0.976579,0.966178,0.957267,14:44
5,0.905667,0.159249,0.945292,0.979725,0.97289,0.9608,14:49
6,0.881195,0.155759,0.950274,0.978197,0.974258,0.963251,14:43
7,0.812002,0.14841,0.952685,0.981153,0.97025,0.964193,14:49
8,0.784391,0.144856,0.952566,0.980385,0.973468,0.964746,14:40
9,0.75481,0.143758,0.951827,0.981711,0.978373,0.965935,14:42


Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/yuan/miniconda3/envs/ML/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home/yuan/miniconda3/envs/ML/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/yuan/miniconda3/envs/ML/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/yuan/miniconda3/envs/ML/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/home/yuan/miniconda3/envs/ML/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home/yuan/miniconda3/envs/ML/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/yuan/miniconda3/envs/ML/lib/python3.7/multiprocessing/co

KeyboardInterrupt: 