In [1]:
import numpy as np
import pandas as pd
import bloscpack as bp

from sklearn.model_selection import StratifiedKFold

import imgaug as ia
import imgaug.augmenters as iaa

from torch.utils.data.dataloader import DataLoader

import fastai
from fastai.vision import *

from optim import Over9000
from data import Bengaliai_DS, Bengaliai_DS_LIT
from model import *
from model_utils import *
from utils import *

---

In [2]:
SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

---
### data

In [3]:
pdf = pd.read_csv('../input/train.csv')
unique_grapheme = pdf['grapheme'].unique()
grapheme_code = dict([(g, c) for g, c in zip(unique_grapheme, np.arange(unique_grapheme.shape[0]))])
pdf['grapheme_code'] = [grapheme_code[g] for g in pdf['grapheme']]

skf = StratifiedKFold(n_splits=7, shuffle=True, random_state=42)
for trn_ndx, vld_ndx in skf.split(pdf['grapheme_code'], pdf['grapheme_code']):
    break
    
imgs = bp.unpack_ndarray_from_file('../features/train_images_size168_pad6.bloscpack')
lbls = pd.read_csv('../input/train.csv').iloc[:, 1:4].values

trn_imgs = imgs[trn_ndx]
trn_lbls = lbls[trn_ndx]
vld_imgs = imgs[vld_ndx]
vld_lbls = lbls[vld_ndx]

In [4]:
batch_size = 64 # 64 is important as the fit_one_cycle arguments are probably tuned for this batch size

# training_set = Bengaliai_DS(trn_imgs, trn_lbls, transform=augs)
validation_set = Bengaliai_DS(vld_imgs, vld_lbls)

# training_loader = DataLoader(training_set, batch_size=batch_size, num_workers=6, shuffle=True) # , sampler=sampler , shuffle=True
validation_loader = DataLoader(validation_set, batch_size=batch_size, num_workers=6, shuffle=False)

# data_bunch = DataBunch(train_dl=training_loader, valid_dl=validation_loader)

---
### model

In [5]:
device = 'cuda:0'
n_grapheme = 168
n_vowel = 11
n_consonant = 7
n_total = n_grapheme + n_vowel + n_consonant

In [6]:
predictor = PretrainedCNN(out_dim=n_total)
classifier = BengaliClassifier(predictor)

In [7]:
weight = torch.load('./outputs/Seresnext50_Size168_BestAug_Nosplitmodel_Nowd_CmMu_1of7.pth')
classifier.load_state_dict(weight)

<All keys matched successfully>

In [36]:
ndcs = []
preds = []
lbls = []

with torch.no_grad():
    for i, (img, lbl) in enumerate(validation_loader):
        ndcs.append(np.arange(i*batch_size, (i+1)*batch_size))
        preds.append(
            np.stack([a.detach().cpu().numpy().argmax(1) for a in classifier(img)], axis=1).astype('uint8')
        )
        lbls.append(lbl.numpy().astype('uint8'))
        display_progress(i+1, len(validation_loader))

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [35]:
len(validation_loader)

449