In [0]:
!pip install d2lzh  # installing d2l
!pip install -U --pre mxnet-cu101mkl  # updating mxnet to at least v1.6

In [0]:
import d2lzh as d2l
from mxnet import autograd, gluon, init
from mxnet.gluon import data as gdata, loss as gloss, nn
import os
import pandas as pd
import shutil
import time

In [0]:
import tarfile

In [0]:
tf = tarfile.open("../cifar/cifar.tar")
tf.extractall('../cifar/')

In [0]:
def read_label_file(data_dir, label_file, train_dir, valid_ratio):
    with open(os.path.join(data_dir, label_file), 'r') as f:
        lines = f.readlines()[1:]
        tokens = [l.rstrip().split(',') for l in lines]
        idx_label = dict(((int(idx), label) for idx, label in tokens))
    labels = set(idx_label.values())
    n_train_valid = len(os.listdir(os.path.join(data_dir, train_dir)))
    n_train = int(n_train_valid * (1 - valid_ratio))
    assert 0 < n_train < n_train_valid
    return n_train // len(labels), idx_label

In [0]:
def mkdir_if_not_exist(path):  
    if not os.path.exists(os.path.join(*path)):
        os.makedirs(os.path.join(*path))

The below functions separate the original training dataset to several datasets "train", "valid", "train_valid", and "test".

In [0]:
def reorg_train_valid(data_dir, train_dir, input_dir, n_train_per_label,
                      idx_label):
    label_count = {}
    for train_file in os.listdir(os.path.join(data_dir, train_dir)):
        idx = int(train_file.split('.')[0])
        label = idx_label[idx]
        mkdir_if_not_exist([data_dir, input_dir, 'train_valid', label])
        shutil.copy(os.path.join(data_dir, train_dir, train_file),
                    os.path.join(data_dir, input_dir, 'train_valid', label))
        if label not in label_count or label_count[label] < n_train_per_label:
            mkdir_if_not_exist([data_dir, input_dir, 'train', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file),
                        os.path.join(data_dir, input_dir, 'train', label))
            label_count[label] = label_count.get(label, 0) + 1
        else:
            mkdir_if_not_exist([data_dir, input_dir, 'valid', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file),
                        os.path.join(data_dir, input_dir, 'valid', label))

In [0]:
def reorg_test(data_dir, test_dir, input_dir):
    mkdir_if_not_exist([data_dir, input_dir, 'test', 'unknown'])
    for test_file in os.listdir(os.path.join(data_dir, test_dir)):
        shutil.copy(os.path.join(data_dir, test_dir, test_file),
                    os.path.join(data_dir, input_dir, 'test', 'unknown'))

In [0]:
def reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir,
                       valid_ratio):
    n_train_per_label, idx_label = read_label_file(data_dir, label_file,
                                                   train_dir, valid_ratio)
    reorg_train_valid(data_dir, train_dir, input_dir, n_train_per_label,
                      idx_label)
    reorg_test(data_dir, test_dir, input_dir)

In [0]:
demo = False

In [0]:
if demo:
    train_dir, test_dir, batch_size = 'train_tiny', 'test_tiny', 1
else:
    train_dir, test_dir, batch_size = 'train', 'test', 128
data_dir, label_file = '../cifar/', 'trainLabels.csv'
input_dir, valid_ratio = 'train_valid_test', 0.1
reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir,
                   valid_ratio)

In [0]:
transform_train = gdata.vision.transforms.Compose([
  gdata.vision.transforms.Resize(40),
  gdata.vision.transforms.RandomBrightness(0.5),
  gdata.vision.transforms.RandomHue(0.5),
  gdata.vision.transforms.RandomResizedCrop(32, scale=(0.64, 1.0), ratio=(1.0,1.0)),
  gdata.vision.transforms.RandomFlipLeftRight(),
  gdata.vision.transforms.ToTensor(),
  gdata.vision.transforms.Normalize([0.4914, 0.4822, 0.4465],
                                    [0.2023, 0.1994, 0.2010])
])

In [0]:
transform_test = gdata.vision.transforms.Compose([
  gdata.vision.transforms.ToTensor(),
  gdata.vision.transforms.Normalize([0.4914, 0.4822, 0.4465],
                                    [0.2023, 0.1994, 0.2010])
])

In [0]:
train_ds = gdata.vision.ImageFolderDataset(
    os.path.join(data_dir, input_dir, 'train'), flag=1)
valid_ds = gdata.vision.ImageFolderDataset(
    os.path.join(data_dir, input_dir, 'valid'), flag=1)
train_valid_ds = gdata.vision.ImageFolderDataset(
    os.path.join(data_dir, input_dir, 'train_valid'), flag=1)
test_ds = gdata.vision.ImageFolderDataset(
    os.path.join(data_dir, input_dir, 'test'), flag=1)

In [0]:
train_iter = gdata.DataLoader(train_ds.transform_first(transform_train),
                              batch_size, shuffle=True, last_batch='keep')
valid_iter = gdata.DataLoader(valid_ds.transform_first(transform_test),
                              batch_size, shuffle=True, last_batch='keep')
train_valid_iter = gdata.DataLoader(train_valid_ds.transform_first(
    transform_train), batch_size, shuffle=True, last_batch='keep')
test_iter = gdata.DataLoader(test_ds.transform_first(transform_test),
                             batch_size, shuffle=False, last_batch='keep')

Define Resnet-18 model

In [0]:
class Residual(nn.HybridBlock):
    def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
        super(Residual, self).__init__(**kwargs)
        self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1,
                               strides=strides)
        self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2D(num_channels, kernel_size=1,
                                   strides=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm()
        self.bn2 = nn.BatchNorm()

    def hybrid_forward(self, F, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return F.relu(Y + X)

In [0]:
def resnet18(num_classes):
    net = nn.HybridSequential()
    net.add(nn.Conv2D(64, kernel_size=3, strides=1, padding=1),
            nn.BatchNorm(), nn.Activation('relu'))

    def resnet_block(num_channels, num_residuals, first_block=False):
        blk = nn.HybridSequential()
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.add(Residual(num_channels, use_1x1conv=True, strides=2))
            else:
                blk.add(Residual(num_channels))
        return blk

    net.add(resnet_block(64, 2, first_block=True),
            resnet_block(128, 2),
            resnet_block(256, 2),
            resnet_block(512, 2))
    net.add(nn.GlobalAvgPool2D(), nn.Dense(num_classes))
    return net

Getting Network

In [0]:
def get_net(ctx):
    num_classes = 10
    net = resnet18(num_classes)
    net.initialize(ctx=ctx, init=init.Xavier())
    return net

loss = gloss.SoftmaxCrossEntropyLoss()

Training Function

In [0]:
def train(net, train_iter, valid_iter, num_epochs, lr, wd, ctx, lr_period,
          lr_decay):
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': lr, 'momentum': 0.9, 'wd': wd})
    # print('1')
    for epoch in range(num_epochs):
        # print('2')
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        if epoch > 0 and epoch % lr_period == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
            # print('3')
        for X, y in train_iter:
            y = y.astype('float32').as_in_context(ctx)
            with autograd.record():
                # print('4')
                y_hat = net(X.as_in_context(ctx))
                # print('5')
                l = loss(y_hat, y).sum()
                # print('6')
            l.backward()
            # print('7')
            trainer.step(batch_size)
            # print('8')
            train_l_sum += l.asscalar()
            # print('9')
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            # print('10')
            n += y.size
        time_s = "time %.2f sec" % (time.time() - start)
        if valid_iter is not None:
            valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx)
            epoch_s = ("epoch %d, loss %f, train acc %f, valid acc %f, "
                       % (epoch + 1, train_l_sum / n, train_acc_sum / n,
                          valid_acc))
        else:
            epoch_s = ("epoch %d, loss %f, train acc %f, " %
                       (epoch + 1, train_l_sum / n, train_acc_sum / n))
        print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))

Training the model by train dataset and valid dataset separatly

In [0]:
ctx, num_epochs, lr, wd = d2l.try_gpu(), 200, 0.1, 1e-4
lr_period, lr_decay = 90, 0.1
#net = get_net(ctx)
# net.hybridize()
# train(net, train_iter, valid_iter, num_epochs, lr, wd, ctx, lr_period, lr_decay)

Training the train_valid dataset and predict the test dataset, and produce a submission.csv file

In [26]:
net, preds = get_net(ctx), []
net.hybridize()
train(net, train_valid_iter, None, num_epochs, lr, wd, ctx, lr_period, lr_decay)

epoch 1, loss 2.392159, train acc 0.149960, time 58.49 sec, lr 0.1
epoch 2, loss 1.899275, train acc 0.308140, time 56.06 sec, lr 0.1
epoch 3, loss 1.655584, train acc 0.401540, time 56.16 sec, lr 0.1
epoch 4, loss 1.414079, train acc 0.494260, time 56.21 sec, lr 0.1
epoch 5, loss 1.176829, train acc 0.586040, time 56.41 sec, lr 0.1
epoch 6, loss 0.981766, train acc 0.655420, time 56.42 sec, lr 0.1
epoch 7, loss 0.840443, train acc 0.706300, time 56.13 sec, lr 0.1
epoch 8, loss 0.742872, train acc 0.740880, time 56.39 sec, lr 0.1
epoch 9, loss 0.669286, train acc 0.767320, time 56.75 sec, lr 0.1
epoch 10, loss 0.619769, train acc 0.784840, time 56.96 sec, lr 0.1
epoch 11, loss 0.565232, train acc 0.803480, time 55.97 sec, lr 0.1
epoch 12, loss 0.523412, train acc 0.819020, time 56.00 sec, lr 0.1
epoch 13, loss 0.494675, train acc 0.828140, time 56.22 sec, lr 0.1
epoch 14, loss 0.460901, train acc 0.839180, time 56.13 sec, lr 0.1
epoch 15, loss 0.434155, train acc 0.849100, time 56.30 s

In [0]:
for X, _ in test_iter:
    y_hat = net(X.as_in_context(ctx))
    preds.extend(y_hat.argmax(axis=1).astype(int).asnumpy())
sorted_ids = list(range(1, len(test_ds) + 1))
sorted_ids.sort(key=lambda x: str(x))
df = pd.DataFrame({'id': sorted_ids, 'label': preds})
df['label'] = df['label'].apply(lambda x: train_valid_ds.synsets[x])

In [0]:
df.to_csv('../cifar/submissions.csv',index=False)