# Intel Scene Classification Challenge

Developer's note:

The following code was executed on AWS EC2 instance with following configuration:

Deep Learning AMI (Ubuntu) Version 22.0 (ami-01a4e5be5f289dd12)

Instance type/GPU - p2.xlarge

Pandas version - 0.22.0, Numpy version - 1.16.2, fastai version - 1.0.50.post1

The folder structure relative to the notebook is as follows. Please keep the folder structure as it is for the script to execute successfully.

dataupload - The folder in which sample_submission_CH2mq5Z.csv, test_WyRytb0.csv, train, train.csv are located. train is the folder here in which all train and test images are present.

The script automatically creates test folder in the current directory containing test images.

In [1]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt, seaborn as sns
from sklearn import metrics
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
import statistics
import os
import shutil
import gc
gc.collect()
import fastai
from torchvision.models import *
from fastai.vision.models import *
from fastai.vision.learner import model_meta
from fastai.vision import *
from fastai.metrics import error_rate
%matplotlib inline
print(pd.__version__)
print(np.__version__)
print(fastai.__version__)

0.22.0
1.16.2
1.0.50.post1


In [2]:
torch.cuda.is_available()

True

In [3]:
path="dataupload/"

In [4]:
print(os.listdir(path))

['sample_submission_CH2mq5Z.csv', 'test_WyRytb0.csv', 'train', 'train.csv', 'models']


In [5]:
train_df=pd.read_csv(path+'train.csv')
train_df.head()

Unnamed: 0,image_name,label
0,0.jpg,0
1,1.jpg,4
2,2.jpg,5
3,4.jpg,0
4,7.jpg,4


In [6]:
test_df=pd.read_csv(path+'test_WyRytb0.csv')
test_df.head()

Unnamed: 0,image_name
0,3.jpg
1,5.jpg
2,6.jpg
3,11.jpg
4,14.jpg


Create a separate test folder

In [8]:
test_path ="./test"

Copy the test images to the test folder

In [10]:
%%time
#create new path
if not os.path.exists(test_path):
    os.mkdir(test_path)
    for filename in test_df["image_name"]:
        shutil.copy(path+'train/'+filename, test_path)

CPU times: user 472 ms, sys: 500 ms, total: 972 ms
Wall time: 4.68 s


### resnet50: seed=2019

In [11]:
np.random.seed(2019)
torch.manual_seed(2019)
torch.backends.cudnn.deterministic = True

In [12]:
%%time
data = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=2019)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=128)
        .databunch(bs=32)
        .normalize(imagenet_stats))

CPU times: user 420 ms, sys: 48 ms, total: 468 ms
Wall time: 387 ms


In [13]:
print(data.c)
print(data.classes)
print(len(data.train_ds))
print(len(data.valid_ds))
print(len(data.test_ds))

6
[0, 1, 2, 3, 4, 5]
13628
3406
7301


In [14]:
%%time
learn = cnn_learner(data, resnet50, pretrained=True, metrics = accuracy)

CPU times: user 2.18 s, sys: 604 ms, total: 2.79 s
Wall time: 4.28 s


In [15]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.445498,0.294935,0.890487,01:48
1,0.329339,0.235546,0.917792,01:35
2,0.264546,0.207588,0.920435,01:35
3,0.218971,0.198193,0.927187,01:35


In [16]:
learn.save('stg1_2019')

In [17]:
learn.unfreeze()
learn.fit_one_cycle(6, max_lr=slice(1e-5,1e-3))

epoch,train_loss,valid_loss,accuracy,time
0,0.256572,0.224413,0.920141,02:15
1,0.252872,0.20617,0.925719,02:14
2,0.244659,0.221736,0.927187,02:14
3,0.131762,0.199631,0.932766,02:13
4,0.091802,0.202431,0.938931,02:16
5,0.06336,0.198323,0.939225,02:15


In [18]:
learn.save('stg2_2019')

In [19]:
data2 = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=2019)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=150)
        .databunch(bs=32)
        .normalize(imagenet_stats))

In [20]:
learn = cnn_learner(data2, resnet50, pretrained=True, metrics = accuracy)
learn.load('stg2_2019')

Learner(data=ImageDataBunch;

Train: LabelList (13628 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
0,4,5,2,5
Path: dataupload;

Valid: LabelList (3406 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
3,1,4,1,5
Path: dataupload;

Test: LabelList (7301 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: EmptyLabelList
,,,,
Path: dataupload, model=Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1,

In [21]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.098979,0.221003,0.938638,02:30
1,0.083319,0.246171,0.938931,02:20
2,0.071986,0.253544,0.938344,02:19
3,0.069523,0.242317,0.942161,02:20


In [22]:
learn.save('stg3_2019')

In [23]:
learn.unfreeze()
learn.fit_one_cycle(2, max_lr=slice(3e-6,1e-4))

epoch,train_loss,valid_loss,accuracy,time
0,0.065011,0.25706,0.935995,03:12
1,0.057232,0.240055,0.940987,03:10


In [24]:
learn.save('stg4_2019')

In [27]:
learn.fit_one_cycle(2, max_lr=slice(4e-6,4e-5))

epoch,train_loss,valid_loss,accuracy,time
0,0.064899,0.250204,0.940987,03:11
1,0.047066,0.244283,0.941867,03:12


In [28]:
learn.save('stg5_2019')

In [30]:
%%time
#learn.get_preds(ds_type=DatasetType.Test)
preds_2019,y_2019 = learn.get_preds(ds_type=DatasetType.Test)

CPU times: user 20.8 s, sys: 8.7 s, total: 29.5 s
Wall time: 28.3 s


### resnet50: seed=35

In [31]:
np.random.seed(35)
torch.manual_seed(35)
torch.backends.cudnn.deterministic = True

In [32]:
data = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=35)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=128)
        .databunch(bs=32)
        .normalize(imagenet_stats))

In [33]:
learn = cnn_learner(data, resnet50, pretrained=True, metrics = accuracy)

In [34]:
%%time
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.43434,0.312398,0.880505,01:36
1,0.354291,0.246578,0.903699,01:35
2,0.279774,0.193455,0.933353,01:35
3,0.254718,0.192246,0.932472,01:36


CPU times: user 4min 42s, sys: 1min 48s, total: 6min 30s
Wall time: 6min 23s


In [35]:
learn.save('stg1_res50_35')

In [36]:
learn.unfreeze()
learn.fit_one_cycle(8, max_lr=slice(1e-5,1e-4))

epoch,train_loss,valid_loss,accuracy,time
0,0.23947,0.179115,0.937463,02:14
1,0.226802,0.17871,0.932766,02:15
2,0.188574,0.175818,0.939812,02:15
3,0.138231,0.164505,0.940987,02:16
4,0.103773,0.167081,0.943042,02:16
5,0.083403,0.164722,0.94451,02:15
6,0.067833,0.167231,0.944803,02:14
7,0.062659,0.167142,0.946565,02:15


In [37]:
learn.save('stg2_res50_35')

In [38]:
data2 = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=35)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=150)
        .databunch(bs=32)
        .normalize(imagenet_stats))

In [39]:
learn = cnn_learner(data2, resnet50, pretrained=True, metrics = accuracy)
learn.load('stg2_res50_35')

Learner(data=ImageDataBunch;

Train: LabelList (13628 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
4,0,4,1,5
Path: dataupload;

Valid: LabelList (3406 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
0,0,2,0,0
Path: dataupload;

Test: LabelList (7301 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: EmptyLabelList
,,,,
Path: dataupload, model=Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1,

In [40]:
learn.fit_one_cycle(3)

epoch,train_loss,valid_loss,accuracy,time
0,0.114445,0.205432,0.940987,02:20
1,0.081826,0.218503,0.940399,02:20
2,0.067583,0.221942,0.94128,02:20


In [41]:
learn.save('stg3_res50_35')

In [42]:
learn = cnn_learner(data2, resnet50, pretrained=True, metrics = accuracy)
learn.load('stg3_res50_35')

Learner(data=ImageDataBunch;

Train: LabelList (13628 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
4,0,4,1,5
Path: dataupload;

Valid: LabelList (3406 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
0,0,2,0,0
Path: dataupload;

Test: LabelList (7301 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: EmptyLabelList
,,,,
Path: dataupload, model=Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1,

In [43]:
learn.unfreeze()
learn.fit_one_cycle(4, max_lr=slice(1e-6,1e-5))

epoch,train_loss,valid_loss,accuracy,time
0,0.076019,0.216861,0.94128,03:10
1,0.070791,0.216724,0.942455,03:11
2,0.053392,0.216986,0.940106,03:11
3,0.048873,0.216146,0.940693,03:11


In [44]:
learn.save('stg4_res50_35')

In [45]:
%%time
#learn.get_preds(ds_type=DatasetType.Test)
preds_35,y_35 = learn.get_preds(ds_type=DatasetType.Test)

CPU times: user 20.9 s, sys: 8.72 s, total: 29.6 s
Wall time: 28.5 s


### Densenet seed = 2000

In [46]:
np.random.seed(2000)
torch.manual_seed(2000)
torch.backends.cudnn.deterministic = True

In [47]:
data = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=2000)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=128)
        .databunch(bs=32)
        .normalize(imagenet_stats))

In [48]:
print(data.c)
print(data.classes)
print(len(data.train_ds))
print(len(data.valid_ds))
print(len(data.test_ds))

6
[0, 1, 2, 3, 4, 5]
13628
3406
7301


In [49]:
learn = cnn_learner(data, densenet121, pretrained=True, metrics = accuracy)

  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)


In [50]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.389126,0.257825,0.909571,01:39
1,0.310119,0.238732,0.916031,01:34
2,0.223588,0.200286,0.931298,01:34
3,0.206075,0.198208,0.927481,01:34


In [51]:
learn.save('stg1_d21_2000')

In [52]:
learn.unfreeze()
learn.fit_one_cycle(4, max_lr=slice(3e-6,3e-5))

epoch,train_loss,valid_loss,accuracy,time
0,0.180344,0.190865,0.93394,02:25
1,0.207324,0.188152,0.935115,02:23
2,0.179336,0.184392,0.935408,02:25
3,0.165157,0.184327,0.934234,02:24


In [53]:
learn.save('stg2_d21_2000')

In [54]:
data2 = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=2000)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=150)
        .databunch(bs=32)
        .normalize(imagenet_stats))

In [55]:
learn = cnn_learner(data2, densenet121, pretrained=True, metrics = accuracy)
learn.load('stg2_d21_2000')

  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)


Learner(data=ImageDataBunch;

Train: LabelList (13628 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
0,4,5,0,2
Path: dataupload;

Valid: LabelList (3406 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
2,3,5,5,5
Path: dataupload;

Test: LabelList (7301 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: EmptyLabelList
,,,,
Path: dataupload, model=Sequential(
  (0): Sequential(
    (0): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(inplace)
      (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        (den

In [56]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.215418,0.204306,0.925719,02:27
1,0.218711,0.198139,0.934527,02:23
2,0.17831,0.190214,0.931298,02:22
3,0.162589,0.187451,0.936583,02:22


In [57]:
learn.save('stg3_d21_2000')

In [58]:
learn.unfreeze()
learn.fit_one_cycle(4, max_lr=slice(8e-6,8e-5))

epoch,train_loss,valid_loss,accuracy,time
0,0.152069,0.187839,0.934527,03:16
1,0.154466,0.191838,0.934821,03:15
2,0.128446,0.183221,0.933647,03:13
3,0.104957,0.185003,0.935995,03:13


In [59]:
learn.save('stg4_d21_2000')

In [60]:
%%time
#learn.get_preds(ds_type=DatasetType.Test)
preds_2000,y_2000 = learn.get_preds(ds_type=DatasetType.Test)

CPU times: user 21.5 s, sys: 9.66 s, total: 31.2 s
Wall time: 30.1 s


### Densenet seed = 8888

In [61]:
np.random.seed(8888)
torch.manual_seed(8888)
torch.backends.cudnn.deterministic = True

In [62]:
data = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=8888)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=128)
        .databunch(bs=32)
        .normalize(imagenet_stats))

In [63]:
learn = cnn_learner(data, densenet121, pretrained=True, metrics = accuracy)

  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)


In [64]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.397829,0.258228,0.912507,01:34
1,0.294086,0.222715,0.927481,01:35
2,0.244964,0.188504,0.93717,01:35
3,0.226304,0.185743,0.936289,01:35


In [65]:
learn.save('stg1_d21_8888')

In [66]:
learn.unfreeze()
learn.fit_one_cycle(4, max_lr=slice(3e-5,8e-4))

epoch,train_loss,valid_loss,accuracy,time
0,0.230589,0.1986,0.930417,02:23
1,0.210432,0.18076,0.935995,02:25
2,0.137779,0.165079,0.943629,02:24
3,0.09779,0.161305,0.944216,02:25


In [67]:
learn.save('stg2_d21_8888')

In [68]:
data2 = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=8888)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=150)
        .databunch(bs=32)
        .normalize(imagenet_stats))

In [69]:
learn = cnn_learner(data2, densenet121, pretrained=True, metrics = accuracy)
learn.load('stg2_d21_8888')

  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)
  nn.init.kaiming_normal(m.weight.data)


Learner(data=ImageDataBunch;

Train: LabelList (13628 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
0,4,5,0,4
Path: dataupload;

Valid: LabelList (3406 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
1,0,3,4,5
Path: dataupload;

Test: LabelList (7301 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: EmptyLabelList
,,,,
Path: dataupload, model=Sequential(
  (0): Sequential(
    (0): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(inplace)
      (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        (den

In [70]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.140241,0.207139,0.935995,02:21
1,0.15379,0.195816,0.936583,02:24
2,0.110974,0.199383,0.938051,02:24
3,0.096953,0.203919,0.940987,02:23


In [71]:
learn.save('stg3_d21_8888')

In [72]:
learn.unfreeze()
learn.fit_one_cycle(4, max_lr=slice(1e-6,5e-6))

epoch,train_loss,valid_loss,accuracy,time
0,0.079827,0.196797,0.940106,03:13
1,0.086263,0.197754,0.940987,03:13
2,0.085381,0.19638,0.942455,03:13
3,0.086286,0.199759,0.940693,03:13


In [73]:
learn.save('stg4_d21_8888')

In [74]:
%%time
#learn.get_preds(ds_type=DatasetType.Test)
preds_8888,y_8888 = learn.get_preds(ds_type=DatasetType.Test)

CPU times: user 21.9 s, sys: 7.4 s, total: 29.3 s
Wall time: 28 s


### resnet101: seed = 123456

In [115]:
np.random.seed(123456)
torch.manual_seed(123456)
torch.backends.cudnn.deterministic = True

In [116]:
data = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=123456)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=128)
        .databunch(bs=32)
        .normalize(imagenet_stats))

In [117]:
print(data.c)
print(data.classes)
print(len(data.train_ds))
print(len(data.valid_ds))
print(len(data.test_ds))

6
[0, 1, 2, 3, 4, 5]
13628
3406
7301


In [118]:
learn = cnn_learner(data, resnet101, pretrained=True, metrics = accuracy)

In [119]:
learn.fit_one_cycle(2)

epoch,train_loss,valid_loss,accuracy,time
0,0.366897,0.245441,0.910452,02:32
1,0.281387,0.208579,0.921315,02:30


In [120]:
learn.save('stg1_res101_123456')

In [122]:
learn.unfreeze()
learn.fit_one_cycle(2, max_lr=slice(1e-5,8e-5))

epoch,train_loss,valid_loss,accuracy,time
0,0.248515,0.172174,0.938638,03:39
1,0.188969,0.15966,0.945097,03:41


In [123]:
learn.save('stg2_res101_123456')

In [125]:
learn.fit_one_cycle(2, max_lr=slice(2e-6,2e-5))

epoch,train_loss,valid_loss,accuracy,time
0,0.162255,0.159484,0.940987,03:41
1,0.131885,0.159431,0.943335,03:41


In [126]:
learn.save('stg3_res101_123456')

In [128]:
data2 = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='train'
                             )
        .split_by_rand_pct(0.2, seed=123456)
        .label_from_df(cols='label')
        .add_test_folder("../test")
        .transform(get_transforms(), size=150)
        .databunch(bs=32)
        .normalize(imagenet_stats))

In [130]:
learn = cnn_learner(data2, resnet101, pretrained=True, metrics = accuracy)
learn.load('stg3_res101_123456')

Learner(data=ImageDataBunch;

Train: LabelList (13628 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
0,4,0,4,1
Path: dataupload;

Valid: LabelList (3406 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
0,3,5,3,1
Path: dataupload;

Test: LabelList (7301 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: EmptyLabelList
,,,,
Path: dataupload, model=Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1,

In [131]:
learn.fit_one_cycle(2)

epoch,train_loss,valid_loss,accuracy,time
0,0.198379,0.186283,0.94128,03:50
1,0.153208,0.169929,0.944803,03:51


In [132]:
learn.save('stg4_res101_123456')

In [134]:
learn.unfreeze()
learn.fit_one_cycle(2, max_lr=slice(2e-5,1e-4))

epoch,train_loss,valid_loss,accuracy,time
0,0.216758,0.202937,0.931591,05:26
1,0.130737,0.165201,0.944216,05:24


In [135]:
learn.save('stg5_res101_123456')

In [137]:
learn.fit_one_cycle(2, max_lr=slice(2e-5,1e-4))

epoch,train_loss,valid_loss,accuracy,time
0,0.151776,0.193136,0.935408,05:26
1,0.098151,0.165426,0.942455,05:25


In [138]:
learn = cnn_learner(data2, resnet101, pretrained=True, metrics = accuracy)
learn.load('stg3_res101_123456')

Learner(data=ImageDataBunch;

Train: LabelList (13628 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
0,4,0,4,1
Path: dataupload;

Valid: LabelList (3406 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: CategoryList
0,3,5,3,1
Path: dataupload;

Test: LabelList (7301 items)
x: ImageList
Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150),Image (3, 150, 150)
y: EmptyLabelList
,,,,
Path: dataupload, model=Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1,

In [139]:
%%time
#learn.get_preds(ds_type=DatasetType.Test)
preds_123456,y_123456 = learn.get_preds(ds_type=DatasetType.Test)

CPU times: user 34.2 s, sys: 15.4 s, total: 49.5 s
Wall time: 48.4 s


### Average all predictions

In [140]:
final_preds = (preds_2019 + preds_35 + preds_2000 + preds_8888 + preds_123456)/5.

In [141]:
final_preds_class = np.argmax(final_preds, axis = 1)

In [142]:
submission = pd.DataFrame({ 'image_name': os.listdir(test_path), 'label': final_preds_class })
submission.to_csv('submission_2.csv', index=False)

In [143]:
submission.head(10)

Unnamed: 0,image_name,label
0,22055.jpg,1
1,18008.jpg,5
2,22333.jpg,5
3,14583.jpg,3
4,20470.jpg,0
5,10073.jpg,5
6,13016.jpg,2
7,17582.jpg,4
8,682.jpg,2
9,18352.jpg,0


In [144]:
submission.loc[submission["image_name"] == "70.jpg"]

Unnamed: 0,image_name,label
180,70.jpg,2


In [145]:
submission.loc[submission["image_name"] == "77.jpg"]

Unnamed: 0,image_name,label
6626,77.jpg,0
