Submission code for Analytics Vidhya Game of Deep Learning

Following configuration is needed to run this code

pandas - 0.23.4

numpy - 1.16.3

Fastai - 1.0.51

Below cell (code cell 1)  output gives the directory structure relative of the notebook and the corresponding data files. Please place the file accordingly.

There is no need to create a test folder. The script automatically creates one and places the test files in that folder. The location of the test file is ../../working/test relative to the train folder.

In [1]:
import numpy as np
import pandas as pd

import os
print(os.listdir("../input"))
print(os.listdir("../input/train"))


['train', 'test_ApKoW4T.csv', 'sample_submission_ns2btKE.csv']
['images', 'train.csv']


In [2]:
import matplotlib.pyplot as plt, seaborn as sns
from sklearn import metrics
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
import statistics
import os
import shutil
import gc
gc.collect()
import fastai
from torchvision.models import *
from fastai.vision.models import *
from fastai.vision.learner import model_meta
from fastai.vision import *
from fastai.metrics import error_rate
%matplotlib inline
print(pd.__version__)
print(np.__version__)
print(fastai.__version__)

0.23.4
1.16.3
1.0.51


In [3]:
torch.cuda.is_available()

True

In [4]:
train_df=pd.read_csv('../input/train/train.csv')
train_df.head()

Unnamed: 0,image,category
0,2823080.jpg,1
1,2870024.jpg,1
2,2662125.jpg,2
3,2900420.jpg,3
4,2804883.jpg,2


In [5]:
test_df=pd.read_csv('../input/test_ApKoW4T.csv')
test_df.head()

Unnamed: 0,image
0,1007700.jpg
1,1011369.jpg
2,1051155.jpg
3,1062001.jpg
4,1069397.jpg


In [6]:
test_path ="./test"

**Copy the test images to the test folder**

In [7]:
%%time
#create new path
if not os.path.exists(test_path):
    os.mkdir(test_path)
    for filename in test_df["image"]:
        shutil.copy('../input/train/images/'+filename, test_path)

CPU times: user 300 ms, sys: 384 ms, total: 684 ms
Wall time: 3.28 s


**Seed = 2019 - Resnet 152**

In [8]:
np.random.seed(2019)
torch.manual_seed(2019)
torch.backends.cudnn.deterministic = True

In [9]:
path='../input/train/'

In [10]:
%%time
data = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='images'
                             )
        .split_by_rand_pct(0.2, seed=2019)
        .label_from_df(cols='category')
        .add_test_folder("../../working/test")
        .transform(get_transforms(), size=128)
        .databunch(bs=32)
        .normalize(imagenet_stats))

CPU times: user 240 ms, sys: 20 ms, total: 260 ms
Wall time: 569 ms


In [11]:
print(data.c)
print(data.classes)
print(len(data.train_ds))
print(len(data.valid_ds))
print(len(data.test_ds))

5
[1, 2, 3, 4, 5]
5002
1250
2680


In [12]:
%%time
learn = cnn_learner(data, resnet152, pretrained=True, metrics = accuracy,model_dir='../../working/')

Downloading: "https://download.pytorch.org/models/resnet152-b121ed2d.pth" to /tmp/.torch/models/resnet152-b121ed2d.pth
241530880it [00:02, 93569290.52it/s]


CPU times: user 5.08 s, sys: 2.26 s, total: 7.34 s
Wall time: 11.4 s


In [13]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.848348,0.559283,0.7952,00:39
1,0.596178,0.393769,0.8464,00:33
2,0.473959,0.352941,0.8712,00:33
3,0.374935,0.336424,0.8776,00:33


In [14]:
learn.save('res152_stg1_2019')

In [16]:
learn.unfreeze()
learn.fit_one_cycle(12, max_lr=slice(1e-5,1e-4))

epoch,train_loss,valid_loss,accuracy,time
0,0.347566,0.311398,0.8872,00:58
1,0.334281,0.260559,0.908,00:57
2,0.279473,0.242438,0.916,00:57
3,0.211397,0.206041,0.9288,00:57
4,0.172023,0.180767,0.9328,00:57
5,0.125416,0.168226,0.9344,00:57
6,0.105376,0.17508,0.936,00:57
7,0.07791,0.162581,0.9384,00:57
8,0.078075,0.170699,0.9368,00:57
9,0.058118,0.166175,0.9392,00:57


In [17]:
learn.save('res152_stg2_2019')

In [23]:
%%time
data2 = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='images'
                             )
        .split_by_rand_pct(0.2, seed=2019)
        .label_from_df(cols='category')
        .add_test_folder("../../working/test")
        .transform(get_transforms(), size=210)
        .databunch(bs=32)
        .normalize(imagenet_stats))

CPU times: user 324 ms, sys: 28 ms, total: 352 ms
Wall time: 359 ms


In [24]:
%%time
learn = cnn_learner(data2, resnet152, pretrained=True, metrics = accuracy,model_dir='../../working/')
learn.load('res152_stg2_2019')

CPU times: user 2.77 s, sys: 1.24 s, total: 4 s
Wall time: 4.01 s


In [25]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.145641,0.188707,0.9344,01:02
1,0.110027,0.145435,0.9584,00:59
2,0.076989,0.163445,0.9528,00:59
3,0.068287,0.159427,0.956,00:59


In [26]:
learn.save('res152_stg3_2019')

In [29]:
learn.unfreeze()
learn.fit_one_cycle(12, max_lr=slice(2e-5,2e-4))

epoch,train_loss,valid_loss,accuracy,time
0,0.057849,0.147229,0.9552,01:38
1,0.083076,0.209359,0.9416,01:37
2,0.137095,0.186365,0.9368,01:37
3,0.144389,0.191259,0.9448,01:37
4,0.113829,0.144142,0.9536,01:37
5,0.101321,0.130766,0.952,01:37
6,0.07246,0.143426,0.9528,01:37
7,0.056732,0.129925,0.96,01:37
8,0.031837,0.11045,0.968,01:37
9,0.019767,0.105947,0.9704,01:37


In [35]:
%%time
#learn.get_preds(ds_type=DatasetType.Test)
preds_2019,y_2019 = learn.get_preds(ds_type=DatasetType.Test)

CPU times: user 4.32 s, sys: 3.3 s, total: 7.62 s
Wall time: 14.2 s


**seed = 12345 -  Resnet 152**

In [40]:
np.random.seed(12345)
torch.manual_seed(12345)
torch.backends.cudnn.deterministic = True

In [41]:
%%time
data = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='images'
                             )
        .split_by_rand_pct(0.2, seed=12345)
        .label_from_df(cols='category')
        .add_test_folder("../../working/test")
        .transform(get_transforms(), size=128)
        .databunch(bs=32)
        .normalize(imagenet_stats))

CPU times: user 216 ms, sys: 8 ms, total: 224 ms
Wall time: 231 ms


In [42]:
print(data.c)
print(data.classes)
print(len(data.train_ds))
print(len(data.valid_ds))
print(len(data.test_ds))

5
[1, 2, 3, 4, 5]
5002
1250
2680


In [43]:
%%time
learn = cnn_learner(data, resnet152, pretrained=True, metrics = accuracy,model_dir='../../working/')

CPU times: user 1.68 s, sys: 524 ms, total: 2.21 s
Wall time: 2.21 s


In [44]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.882856,0.595495,0.7664,00:33
1,0.568951,0.439767,0.8424,00:32
2,0.445385,0.357474,0.8632,00:32
3,0.386599,0.350271,0.8664,00:32


In [45]:
learn.save('res152_stg1_12345')

In [47]:
learn.unfreeze()
learn.fit_one_cycle(10, max_lr=slice(1e-5,1e-4))

epoch,train_loss,valid_loss,accuracy,time
0,0.372114,0.325295,0.876,00:57
1,0.308449,0.270009,0.9016,00:57
2,0.244545,0.241237,0.9088,00:58
3,0.201255,0.250829,0.9136,00:57
4,0.153734,0.195929,0.9352,00:57
5,0.12786,0.208747,0.9304,00:57
6,0.109722,0.19864,0.936,00:57
7,0.072503,0.227494,0.9312,00:57
8,0.062376,0.192808,0.9344,00:57
9,0.061913,0.191418,0.9384,00:57


In [48]:
learn.save('res152_stg2_12345')

In [54]:
%%time
data2 = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='images'
                             )
        .split_by_rand_pct(0.2, seed=12345)
        .label_from_df(cols='category')
        .add_test_folder("../../working/test")
        .transform(get_transforms(), size=210)
        .databunch(bs=32)
        .normalize(imagenet_stats))

CPU times: user 368 ms, sys: 20 ms, total: 388 ms
Wall time: 394 ms


In [55]:
%%time
learn = cnn_learner(data2, resnet152, pretrained=True, metrics = accuracy,model_dir='../../working/')
learn.load('res152_stg2_12345')

CPU times: user 2.76 s, sys: 1.05 s, total: 3.81 s
Wall time: 3.82 s


In [56]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.127465,0.15991,0.948,00:58
1,0.127787,0.168149,0.9488,00:58
2,0.091874,0.143205,0.9536,00:58
3,0.066063,0.14306,0.9528,00:58


In [57]:
learn.save('res152_stg3_12345')

In [59]:
learn.unfreeze()
learn.fit_one_cycle(12, max_lr=slice(2e-6,2e-5))

epoch,train_loss,valid_loss,accuracy,time
0,0.071516,0.145105,0.9512,01:37
1,0.058395,0.135555,0.956,01:37
2,0.053564,0.151212,0.9536,01:37
3,0.041281,0.146875,0.9536,01:37
4,0.04635,0.141313,0.9544,01:37
5,0.047478,0.148747,0.9512,01:37
6,0.03712,0.147765,0.9488,01:37
7,0.029996,0.141482,0.9512,01:37
8,0.022233,0.14329,0.9496,01:37
9,0.029539,0.139367,0.9488,01:37


In [65]:
%%time
#learn.get_preds(ds_type=DatasetType.Test)
preds_12345,y_12345 = learn.get_preds(ds_type=DatasetType.Test)

CPU times: user 4.23 s, sys: 3.22 s, total: 7.45 s
Wall time: 13.3 s


**seed = 555 - densenet 201**

In [66]:
np.random.seed(555)
torch.manual_seed(555)
torch.backends.cudnn.deterministic = True

In [67]:
%%time
data = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='images'
                             )
        .split_by_rand_pct(0.2, seed=555)
        .label_from_df(cols='category')
        .add_test_folder("../../working/test")
        .transform(get_transforms(), size=128)
        .databunch(bs=32)
        .normalize(imagenet_stats))

CPU times: user 208 ms, sys: 12 ms, total: 220 ms
Wall time: 225 ms


In [68]:
print(data.c)
print(data.classes)
print(len(data.train_ds))
print(len(data.valid_ds))
print(len(data.test_ds))

5
[1, 2, 3, 4, 5]
5002
1250
2680


In [69]:
%%time
learn = cnn_learner(data, densenet201, pretrained=True, metrics = accuracy,model_dir='../../working/')

Downloading: "https://download.pytorch.org/models/densenet201-c1103571.pth" to /tmp/.torch/models/densenet201-c1103571.pth
81131730it [00:01, 41250465.90it/s]


CPU times: user 1.59 s, sys: 280 ms, total: 1.87 s
Wall time: 3.73 s


In [70]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.784089,0.525924,0.7992,00:36
1,0.478887,0.352801,0.8656,00:35
2,0.374736,0.302911,0.8888,00:36
3,0.29867,0.282741,0.8928,00:35


In [71]:
learn.save('d201_stg1_555')

In [73]:
learn.unfreeze()
learn.fit_one_cycle(12, max_lr=slice(5e-5,1e-3))

epoch,train_loss,valid_loss,accuracy,time
0,0.265038,0.25516,0.9048,00:43
1,0.281112,0.295027,0.8928,00:42
2,0.281579,0.257174,0.9,00:42
3,0.240531,0.256081,0.9064,00:42
4,0.196472,0.256594,0.9072,00:42
5,0.168094,0.229469,0.9216,00:43
6,0.128383,0.193671,0.9352,00:42
7,0.087685,0.189868,0.9304,00:43
8,0.059109,0.176396,0.944,00:42
9,0.034631,0.160573,0.9488,00:42


In [74]:
learn.save('d201_stg2_555')

In [80]:
%%time
data2 = (ImageList.from_csv(path=path, 
                              csv_name='train.csv',
                              folder='images'
                             )
        .split_by_rand_pct(0.2, seed=555)
        .label_from_df(cols='category')
        .add_test_folder("../../working/test")
        .transform(get_transforms(), size=210)
        .databunch(bs=32)
        .normalize(imagenet_stats))

CPU times: user 368 ms, sys: 16 ms, total: 384 ms
Wall time: 390 ms


In [81]:
%%time
learn = cnn_learner(data2, densenet201, pretrained=True, metrics = accuracy,model_dir='../../working/')
learn.load('d201_stg2_555')

CPU times: user 2.19 s, sys: 352 ms, total: 2.54 s
Wall time: 2.54 s


In [82]:
learn.fit_one_cycle(4)

epoch,train_loss,valid_loss,accuracy,time
0,0.104153,0.146969,0.9544,01:01
1,0.072977,0.127166,0.9528,00:58
2,0.044679,0.118695,0.9616,00:57
3,0.028871,0.12765,0.9608,00:57


In [83]:
learn.save('d201_stg3_555')

In [86]:
learn.unfreeze()
learn.fit_one_cycle(12, max_lr=slice(5e-6,3e-5))

epoch,train_loss,valid_loss,accuracy,time
0,0.035123,0.128852,0.9592,01:13
1,0.030292,0.124718,0.9624,01:12
2,0.030979,0.124429,0.9576,01:11
3,0.027608,0.123051,0.9632,01:11
4,0.021081,0.129314,0.968,01:11
5,0.035194,0.123586,0.9632,01:12
6,0.021561,0.120666,0.9616,01:11
7,0.024442,0.120267,0.9664,01:11
8,0.015117,0.119429,0.9624,01:11
9,0.023678,0.119406,0.9624,01:11


In [94]:
%%time
#learn.get_preds(ds_type=DatasetType.Test)
preds_555,y_555 = learn.get_preds(ds_type=DatasetType.Test)

CPU times: user 5.14 s, sys: 1.28 s, total: 6.41 s
Wall time: 13.5 s


**Ensemble**

In [95]:
final_preds = (preds_2019 + preds_12345 + preds_555)/3.

In [96]:
final_preds_class = np.argmax(final_preds, axis = 1)

In [97]:
final_preds_class = final_preds_class + 1

In [98]:
submission = pd.DataFrame({ 'image': os.listdir(test_path), 'category': final_preds_class })
submission.to_csv('sub_012.csv', index=False)

In [99]:
submission.head()

Unnamed: 0,image,category
0,2827259.jpg,5
1,2878764.jpg,3
2,2888455.jpg,1
3,2886481.jpg,1
4,2780682.jpg,2
