In [1]:
import os, sys, math, io
import numpy as np
import pandas as pd
import multiprocessing as mp
import bson
import struct
from PIL import Image
import time
import shutil

%matplotlib inline
import matplotlib.pyplot as plt

from collections import defaultdict
from tqdm import *

In [2]:
# loading PyTorch
import torch
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable
import torchvision
import torchvision.transforms as T
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
from torch.utils.data import Dataset

In [3]:
# load pre-processing study results to build hash
RawDataStudy_dir = '/media/hua/HuaSSD/KaggleData/CdiscountImageClassificationChallenge/RawDataStudy/'
train_offsets_df = pd.read_csv( RawDataStudy_dir + 'train_offsets.csv', index_col=0)
train_images_df = pd.read_csv( RawDataStudy_dir + 'train_freqs.csv')
val_images_df = pd.read_csv( RawDataStudy_dir + 'val_images_all.csv', index_col=0)
categories_df = pd.read_csv( RawDataStudy_dir + 'categories_name_to_id.csv')

  mask |= (ar1 == a)


In [4]:
train_images_df.head()

Unnamed: 0,product_id,category_idx,img_idx,freqs
0,0,5055,0,61688
1,1,5055,0,61688
2,5,5055,0,61688
3,11,5055,0,61688
4,16,5055,0,61688


In [5]:
train_images_df.tail()

Unnamed: 0,product_id,category_idx,img_idx,freqs
12129136,19028368,5101,2,13
12129137,20643558,5101,0,13
12129138,20643558,5101,1,13
12129139,20643558,5101,2,13
12129140,20643558,5101,3,13


In [6]:
# build hash map for l1 and l2 id
idx2l1 = list(categories_df['category_level1'])
idx2l2 = list(categories_df['category_level2'])
frequencies = train_offsets_df['category_id'].value_counts()

In [7]:
data_dir = "/media/hua/HuaSSD/KaggleData/CdiscountImageClassificationChallenge/"
train_bson_path = os.path.join(data_dir, "train.bson")
train_bson_file = open(train_bson_path, "rb")

In [8]:
class BSONIterator(Dataset):
    def __init__(self, bson_file, images_df, offsets_df, transform, mode = 'train'):
        super(BSONIterator, self).__init__()
        self.file = bson_file
        self.images_df = images_df
        self.offsets_df = offsets_df
        self.transform = transform
        self.mode = mode

    def __getitem__(self, idx):
        image_row = self.images_df.iloc[idx]
        product_id = image_row["product_id"]
        offset_row = self.offsets_df.loc[product_id]
        # Random access this product's data from the BSON file.
        self.file.seek(offset_row["offset"])
        item_data = self.file.read(offset_row["length"])
        # Grab the image from the product.
        item = bson.BSON.decode(item_data)
        img_idx = image_row["img_idx"]
        bson_img = item["imgs"][img_idx]["picture"]

        # Load the image.
        image = io.BytesIO(bson_img)
        img = Image.open(image)
        x = self.transform(img)
        idx = int(image_row["category_idx"])
        level1 = int(idx2l1[idx])
        level2 = int(idx2l2[idx])

        target1 = torch.LongTensor([level1])
        target2 = torch.LongTensor([level2])
        target3 = torch.LongTensor([idx])
        if self.mode == 'train':
            return x, target1, target2, target3 #for the sake of pin_memory and async
        if self.mode == 'valid':
            return x, [target1]*10, [target2]*10, [target3]*10
        else:
            return x
    
    def __len__(self):
        return len(self.images_df)

In [9]:
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
transform_train = T.Compose([T.RandomHorizontalFlip(), 
                             T.ToTensor(),T.Normalize(mean=mean, std=std)])
transform_val = T.Compose([T.ToTensor(),T.Normalize(mean=mean, std=std)])

Create a generator for training and a generator for validation.

In [10]:
train_gen = BSONIterator(train_bson_file, train_images_df, train_offsets_df, transform_train, mode = 'train')
val_gen = BSONIterator(train_bson_file, val_images_df, train_offsets_df, transform_val, mode = 'train')

In [11]:
print(len(train_gen), len(val_gen))

12129141 242152


In [12]:
# cutoff = 1000
# freqs = train_images_df['freqs']
# weights = np.where(freqs>cutoff, 1.0, 2.0)
# num_samples = len(train_gen)

In [13]:
batch_size = 128
loader_train = DataLoader(train_gen, batch_size=batch_size, 
                          sampler=sampler.RandomSampler(train_gen), 
                          num_workers=1, pin_memory = True)
loader_val = DataLoader(val_gen, batch_size=batch_size, sampler=sampler.SequentialSampler(val_gen), 
                        num_workers=1, pin_memory = True)

In [14]:
print(len(loader_train), len(loader_val))

94759 1892


In [15]:
# itr = iter(loader_train)

In [16]:
# img, target1, target2, target3 = next(itr)

In [17]:
# img.size(), target1.size(), target2.size(), target3.size()

In [18]:
# itr = iter(loader_val)

In [19]:
# img, target1, target2, target3 = next(itr)

In [20]:
# img.size(), target1.size(), target2.size(), target3.size()

# Training

**ResNet101**

In [21]:
model = torchvision.models.resnet101(pretrained=True)
#0.5 comparable to the tencrop method with 4X4/6X6
model.avgpool = nn.Sequential(nn.AvgPool2d(kernel_size = 6), nn.Dropout(p=0.5,inplace=True))
model.fc = nn.Linear(in_features=2048, out_features=49 + 483 + 5270)

In [22]:
# loading models ...
model_dir = '/media/hua/HuaSSD/KaggleData/CdiscountImageClassificationChallenge/ModelResNet/' #model saved in SSD
# em... we need to change this from time to time
#trained_model = model_dir + 'ResNet101_L4L5_4Epoch_DO05_W111_20171204.pth.tar'
trained_model = model_dir + 'ResNet101_L4L5_6Epoch_lrneg3_DO05_W111_20171208.pth.tar'

def load_model(model, trained_model):
    if os.path.isfile(trained_model):
        print("=> loading checkpoint '{}'".format(trained_model))
        checkpoint = torch.load(trained_model)
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}'".format(trained_model))
        return model
    else:
        print("=> no checkpoint found at '{}'".format(best_model))

In [23]:
model = load_model(model, trained_model)

=> loading checkpoint '/media/hua/HuaSSD/KaggleData/CdiscountImageClassificationChallenge/ModelResNet/ResNet101_L4L5_6Epoch_lrneg3_DO05_W111_20171208.pth.tar'
=> loaded checkpoint '/media/hua/HuaSSD/KaggleData/CdiscountImageClassificationChallenge/ModelResNet/ResNet101_L4L5_6Epoch_lrneg3_DO05_W111_20171208.pth.tar'


In [24]:
for layer in [model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3]:
    for param in layer.parameters():
        param.requires_grad = False
model.cuda()

ResNet (
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (relu): ReLU (inplace)
  (maxpool): MaxPool2d (size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
  (layer1): Sequential (
    (0): Bottleneck (
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      (relu): ReLU (inplace)
      (downsample): Sequential (
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): Bott

In [25]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(lr, optimizer, epoch, denominator = 2):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = lr * (0.1 ** (epoch // denominator))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    batch_size = target.size(0)
    _, pred = output.max(dim=1)
    correct = pred.eq(target)
    res = []
    for k in topk:
        correct_k = correct.float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [26]:
def train(train_loader, model, criterion, optimizer, weights, epoch, print_freq = 50):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    loss_log = []
    acc_log = []
    PREC1 = 1

    # switch to train mode
    model.train()

    end = time.time()
    for i, (img, target1, target2, target3) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        img = img.cuda(async=True)
        img_var = Variable(img)
        
        target1 = target1.view(-1).cuda(async=True)
        target1_var = Variable(target1)
        target2 = target2.view(-1).cuda(async=True)
        target2_var = Variable(target2)
        target3 = target3.view(-1).cuda(async=True)
        target3_var = Variable(target3)

        # compute output
        output = model(img_var)
        loss1 = criterion(output[:, :49], target1_var)
        loss2=  criterion(output[:, 49:532], target2_var)
        loss3=  criterion(output[:, 532:], target3_var)
        loss = loss1*weights[0] + loss2*weights[1] + loss3*weights[2]
        # measure accuracy and record loss of selected target
        if weights[2]>0:
            prec1 = accuracy(output.data[:, 532:], target3, topk=(1, ))[0]#only need top1
        elif weights[1]>0:
            prec1 = accuracy(output.data[:, 49:532], target2, topk=(1, ))[0]
        else:
            prec1 = accuracy(output.data[:, :49], target1, topk=(1, ))[0]
        losses.update(loss.data[0], img.size(0)) #[0] to take out the float inside torch.Tensor
        top1.update(prec1[0], img.size(0))
        loss_log.append(losses.val)
        acc_log.append(top1.val)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1))
    return loss_log, acc_log

In [27]:
def validate(val_loader, model, weights, print_freq=20):
    batch_time = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (img, target1, target2, target3) in enumerate(val_loader):
        
        img = img.cuda(async=True)
        img_var = Variable(img, volatile=True)
        
        target1 = target1.view(-1).cuda(async=True)
        target1_var = Variable(target1)
        target2 = target2.view(-1).cuda(async=True)
        target2_var = Variable(target2)
        target3 = target3.view(-1).cuda(async=True)
        target3_var = Variable(target3)

        # compute output
        output = model(img_var)

        # measure accuracy and record loss
        if weights[2]> 0:
            prec1 = accuracy(output.data[:, 532:], target3, topk=(1, ))[0]#only need top1
        elif weights[1]>0:
            prec1 = accuracy(output.data[:, 49:532], target2, topk=(1, ))[0]
        else:
            prec1 = accuracy(output.data[:, :49], target1, topk=(1, ))[0]
        top1.update(prec1[0], img.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))

    return top1.avg

In [28]:
def validate_average(val_loader, model, weights, n = 10, print_freq=80):
    batch_time = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (img, target1, target2, target3) in enumerate(val_loader):
        
        img = img.cuda(async=True)
        img_var = Variable(img, volatile=True)
        
        target1 = target1.view(-1, n)[:, 0]
        target1 = target1.cuda()
        target1_var = Variable(target1)
        target2 = target2.view(-1, n)[:, 0]
        target2 = target2.cuda()
        target2_var = Variable(target2)
        target3 = target3.view(-1, n)[:, 0]
        target3 = target3.cuda()
        target3_var = Variable(target3)

        # compute output
        output = model(img_var)
        output = output.view(-1, n, 5802).mean(dim = 1)

        # measure accuracy and record loss
        if weights[2]> 0:
            prec1 = accuracy(output.data[:, 532:], target3, topk=(1, ))[0]#only need top1
        elif weights[1]>0:
            prec1 = accuracy(output.data[:, 49:532], target2, topk=(1, ))[0]
        else:
            prec1 = accuracy(output.data[:, :49], target1, topk=(1, ))[0]
        top1.update(prec1[0], img.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1))

    return top1.avg

In [None]:
if __name__ == '__main__':
    best_prec1 = 63
    criterion = nn.CrossEntropyLoss().cuda()
    #lr = 1e-2
    #reduce learning rate when model is well trained
    #lr = 1e-3
    #reduce learning rate agian...
    lr = 1e-4
    optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr = lr, momentum=0.9, 
                          weight_decay=0)
    resume = None
    # em... train several epochs
    start_epoch = 0
    epochs = 3
    arch = 'resnet101_levelID'

    for epoch in range(start_epoch, epochs):
        adjust_learning_rate(lr=lr, optimizer=optimizer, epoch=epoch, denominator=2)

        # train for one epoch
        for weights in [[1, 1, 1]]:
            loss_log, acc_log = train(train_loader=loader_train, model=model, criterion=criterion,
                                      weights = weights, optimizer=optimizer, epoch=epoch)

        # evaluate on validation set
            prec1 = validate(val_loader=loader_val, model=model, weights=weights)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': arch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best)

        #plot loss and acc
        #fig = plt.figure(figsize = (6,3), dpi = 600)
        #loss_log = np.array(loss_log)
        #ax1 = plt.subplot(121)
        #ax1.plot(loss_log)
        #ax1.set_ylabel('Loss', weight = 'bold')
        #acc_log = np.array(acc_log)
        #ax2 = plt.subplot(111)
        #ax2.plot(acc_log)
        #ax2.set_ylabel('Train_accuracy', weight = 'bold')
        #np.savetxt(X=np.vstack((loss_log, acc_log)), fname='loss_acc_log.txt', fmt='%.3f')

Epoch: [0][0/94759]	Time 129.946 (129.946)	Data 1.567 (1.567)	Loss 3.1924 (3.1924)	Prec@1 66.406 (66.406)
Epoch: [0][50/94759]	Time 0.849 (3.379)	Data 0.000 (0.031)	Loss 2.7376 (2.9758)	Prec@1 65.625 (64.139)
Epoch: [0][100/94759]	Time 0.860 (2.132)	Data 0.000 (0.016)	Loss 2.5216 (2.9333)	Prec@1 68.750 (64.766)
Epoch: [0][150/94759]	Time 0.850 (1.712)	Data 0.000 (0.011)	Loss 3.9911 (2.9456)	Prec@1 54.688 (64.942)
Epoch: [0][200/94759]	Time 0.852 (1.498)	Data 0.000 (0.008)	Loss 2.8168 (2.9565)	Prec@1 65.625 (64.937)
Epoch: [0][250/94759]	Time 0.853 (1.370)	Data 0.000 (0.006)	Loss 3.2236 (2.9578)	Prec@1 61.719 (65.080)
Epoch: [0][300/94759]	Time 0.849 (1.284)	Data 0.000 (0.005)	Loss 3.5103 (2.9431)	Prec@1 57.031 (65.267)
Epoch: [0][350/94759]	Time 0.852 (1.222)	Data 0.000 (0.005)	Loss 3.3751 (2.9446)	Prec@1 54.688 (65.251)
Epoch: [0][400/94759]	Time 0.852 (1.176)	Data 0.000 (0.004)	Loss 3.4974 (2.9441)	Prec@1 61.719 (65.263)
Epoch: [0][450/94759]	Time 0.853 (1.140)	Data 0.000 (0.004)	Los