In [1]:
import os, sys, math, io
import numpy as np
import pandas as pd
import multiprocessing as mp
import bson
import struct
from PIL import Image
import time
import shutil

%matplotlib inline
import matplotlib.pyplot as plt

from collections import defaultdict
from tqdm import *

In [2]:
import torch
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable
import torchvision
import torchvision.transforms as T
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
from torch.utils.data import Dataset

###  Instead of create a new dataframe, just load the `sample_submission.csv` file and rewrite it.

In [3]:
data_dir = './input/'
submission_df = pd.read_csv(data_dir + "sample_submission.csv")
submission_df.head()

Unnamed: 0,_id,category_id
0,10,1000010653
1,14,1000010653
2,21,1000010653
3,24,1000010653
4,27,1000010653


In [4]:
categories_path = os.path.join(data_dir, "category_names.csv")
categories_df = pd.read_csv(categories_path, index_col="category_id")

# Maps the category_id to an integer index.
categories_df["category_idx"] = pd.Series(range(len(categories_df)), index=categories_df.index)
categories_df.head()

Unnamed: 0_level_0,category_level1,category_level2,category_level3,category_idx
category_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1000021794,ABONNEMENT / SERVICES,CARTE PREPAYEE,CARTE PREPAYEE MULTIMEDIA,0
1000012764,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI FUMEUR,1
1000012776,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,ABRI VELO - ABRI MOTO,2
1000012768,AMENAGEMENT URBAIN - VOIRIE,AMENAGEMENT URBAIN,FONTAINE A EAU,3
1000012755,AMENAGEMENT URBAIN - VOIRIE,SIGNALETIQUE,PANNEAU D'INFORMATION EXTERIEUR,4


Recover the mapping dictionaries for idx to id.

In [5]:
id_idx = zip(categories_df.index, categories_df['category_idx'])
idx_id = zip(categories_df['category_idx'], categories_df.index)
cat2idx = dict(id_idx)
idx2cat = dict(idx_id)

### Change the model structure to the one used in train.

In [6]:
model = torchvision.models.resnet50(pretrained=False)
model.avgpool = nn.AvgPool2d(kernel_size = 6)
model.fc = nn.Linear(in_features=2048, out_features=49 + 483 + 5270)

In [7]:
trained_model = 'model_best.pth.tar'

def load_model(model, trained_model):
    if os.path.isfile(trained_model):
        print("=> loading checkpoint '{}'".format(trained_model))
        checkpoint = torch.load(trained_model)
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}'".format(trained_model))
        return model
    else:
        print("=> no checkpoint found at '{}'".format(best_model))

In [8]:
model = load_model(model, trained_model)

=> loading checkpoint 'model_best.pth.tar'
=> loaded checkpoint 'model_best.pth.tar'


### change the fc layer back to the one without levelID to save time in evaluation.

In [9]:
new_fc = nn.Linear(in_features=2048, out_features=5270)
new_fc.weight.data = model.fc.weight.data[532:]
new_fc.bias.data = model.fc.bias.data[532:]
model.fc = new_fc

In [10]:
model.cuda() #load into gpu

ResNet (
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (relu): ReLU (inplace)
  (maxpool): MaxPool2d (size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
  (layer1): Sequential (
    (0): Bottleneck (
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      (relu): ReLU (inplace)
      (downsample): Sequential (
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): Bott

In [11]:
test_bson_path = os.path.join(data_dir, "test.bson")
test_bson_file = open(test_bson_path, "rb")
test_data = bson.decode_file_iter(open(test_bson_path, "rb"))
num_test_products = 1768182

### Average the results from the orginal and cropped/flipped images to reduce the validation errors.

In [12]:
try:
    import accimage
except ImportError:
    accimage = None
    
def _is_pil_image(img):
    if accimage is not None:
        return isinstance(img, (Image.Image, accimage.Image))
    else:
        return isinstance(img, Image.Image)
    
def hflip(img):
    """Horizontally flip the given PIL Image.
    Args:
        img (PIL Image): Image to be flipped.
    Returns:
        PIL Image:  Horizontall flipped image.
    """
    if not _is_pil_image(img):
        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))

    return img.transpose(Image.FLIP_LEFT_RIGHT)

In [13]:
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
# transform_train = T.Compose([T.RandomHorizontalFlip(), 
#                              T.ToTensor(),T.Normalize(mean=mean, std=std)])
transform_0 = T.Compose([T.ToTensor(),T.Normalize(mean=mean, std=std)])
transform_1 = T.Compose([hflip, transform_0])
transform_2 = T.Compose([T.Scale(size=240), T.RandomCrop(size=180), transform_0])
transform_3 = T.Compose([T.Scale(size=240), T.RandomCrop(size=180), transform_1])

### Do flip and random crop according to the images per product has.

In [14]:
def transform(imgs, transform_0, transform_1, transform_2, transform_3):
    num_imgs = len(imgs)
    batch_img = []
    if num_imgs == 1:
        bson_img = imgs[0]["picture"]
        image = io.BytesIO(bson_img)
        img = Image.open(image)
        batch_x = [transform_0(img), transform_1(img)]
#     elif num_imgs == 2:
#         for i in range(num_imgs):
#             bson_img = imgs[0]["picture"]
#             image = io.BytesIO(bson_img)
#             img = Image.open(image)
#             batch_img.append(img)
#         img1, img2 = batch_img[0], batch_img[1]
#         batch_x = [transform_0(img1), transform_0(img2), transform_1(img1), transform_1(img2)]
#     elif num_imgs == 3:
#         for i in range(num_imgs):
#             bson_img = imgs[0]["picture"]
#             image = io.BytesIO(bson_img)
#             img = Image.open(image)
#             batch_img.append(img)
#         img1, img2, img3 = batch_img[0], batch_img[1], batch_img[2]
#         batch_x = [transform_0(img1), transform_0(img2), transform_0(img3), transform_1(img1)]
    else:
        batch_x = []
        for i in range(num_imgs):
            bson_img = imgs[0]["picture"]
            image = io.BytesIO(bson_img)
            img = Image.open(image)
            batch_x.append(transform_0(img))
    return batch_x
        
        
    

In [15]:
model.eval()
with tqdm(total=num_test_products) as pbar:
    for c, d in enumerate(test_data):
        product_id = d["_id"]
        imgs = d["imgs"]

        batch_x = transform(imgs, transform_0=transform_0, transform_1=transform_1,
                    transform_2=transform_2, transform_3=transform_3)

        batch_x = torch.stack(dim=0, sequence=batch_x).cuda()
        img_var = Variable(batch_x, volatile=True)
        prediction = model(img_var).data
        avg_pred = prediction.mean(dim=0)
        _, cat_idx = avg_pred.max(dim=0)
        submission_df.iloc[c]["category_id"] = idx2cat[cat_idx[0]]   
        pbar.update()

submission_df.to_csv("my_submission.csv.gz", compression="gzip", index=False)

100%|██████████| 1768182/1768182 [3:43:00<00:00, 132.15it/s]  


### original evaluate method without any average

In [13]:
model.eval()
with tqdm(total=num_test_products) as pbar:
    for c, d in enumerate(test_data):
        product_id = d["_id"]
        num_imgs = len(d["imgs"])

        batch_x = []
        for i in range(num_imgs):
            bson_img = d["imgs"][i]["picture"]

            # Load and preprocess the image.
            image = io.BytesIO(bson_img)
            img = Image.open(image)
            x = transform_val(img)

            # Add the image to the batch.
            batch_x.append(x)

        batch_x = torch.stack(dim=0, sequence=batch_x).cuda()
        img_var = Variable(batch_x, volatile=True)
        prediction = model(img_var).data
        avg_pred = prediction.mean(dim=0)
        _, cat_idx = avg_pred.max(dim=0)
        submission_df.iloc[c]["category_id"] = idx2cat[cat_idx[0]]   
        pbar.update()

submission_df.to_csv("my_submission.csv.gz", compression="gzip", index=False)

100%|██████████| 1768182/1768182 [3:34:48<00:00, 137.19it/s]  


In [14]:
submission_df.head()

Unnamed: 0,_id,category_id
0,10,1000005605
1,14,1000010653
2,21,1000010653
3,24,1000002334
4,27,1000022508


In [16]:
submission_df.head()

Unnamed: 0,_id,category_id
0,10,1000015802
1,14,1000010653
2,21,1000010653
3,24,1000002373
4,27,1000022508


### it seems that using average is not as good as without any augmentation.