In [2]:
import os
import json

import cv2
import numpy as np
import pandas as pd
import keras
from keras import layers
from keras.applications import DenseNet121
from keras.callbacks import Callback,ModelCheckpoint,ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import Adam,Nadam
import tensorflow as tf
from tqdm import tqdm

Using TensorFlow backend.


In [4]:
train_df=pd.read_csv('./input/train.csv')
print(train_df.shape)
train_df.head()s

(50272, 2)


Unnamed: 0,ImageId_ClassId,EncodedPixels
0,0002cc93b.jpg_1,29102 12 29346 24 29602 24 29858 24 30114 24 3...
1,0002cc93b.jpg_2,
2,0002cc93b.jpg_3,
3,0002cc93b.jpg_4,
4,00031f466.jpg_1,


In [5]:
submission_df=pd.read_csv('./input/sample_submission.csv')
print(submission_df.shape)
submission_df.head()

(7204, 2)


Unnamed: 0,ImageId_ClassId,EncodedPixels
0,004f40c73.jpg_1,1 1
1,004f40c73.jpg_2,1 1
2,004f40c73.jpg_3,1 1
3,004f40c73.jpg_4,1 1
4,006f39c41.jpg_1,1 1


In [6]:
unique_test_images=submission_df['ImageId_ClassId'].apply(lambda x:x.split('_')[0]).unique()
unique_test_images

array(['004f40c73.jpg', '006f39c41.jpg', '00b7fb703.jpg', ...,
       'ffbf79783.jpg', 'ffc9a6187.jpg', 'ffdb60677.jpg'], dtype=object)

In [7]:
train_df['isNan']=pd.isna(train_df['EncodedPixels'])
train_df['ImageId']=train_df['ImageId_ClassId'].apply(lambda x:x.split('_')[0])
train_df.head()

Unnamed: 0,ImageId_ClassId,EncodedPixels,isNan,ImageId
0,0002cc93b.jpg_1,29102 12 29346 24 29602 24 29858 24 30114 24 3...,False,0002cc93b.jpg
1,0002cc93b.jpg_2,,True,0002cc93b.jpg
2,0002cc93b.jpg_3,,True,0002cc93b.jpg
3,0002cc93b.jpg_4,,True,0002cc93b.jpg
4,00031f466.jpg_1,,True,00031f466.jpg


In [8]:
train_nan_df=train_df.groupby(by='ImageId',axis=0).agg('sum')
train_nan_df.reset_index(inplace=True)
train_nan_df.rename(columns={'isNan':'missingCount'},inplace=True)
train_nan_df['missingCount']=train_nan_df['missingCount'].astype(np.int32)
train_nan_df['allMissing']=(train_nan_df['missingCount']==4).astype(int)

train_nan_df.head()

Unnamed: 0,ImageId,missingCount,allMissing
0,0002cc93b.jpg,3,0
1,00031f466.jpg,4,1
2,000418bfc.jpg,4,1
3,000789191.jpg,4,1
4,0007a71bf.jpg,3,0


In [9]:
test_nan_df=pd.DataFrame(unique_test_images,columns=['ImageId'])
print(test_nan_df.shape)
test_nan_df.head()

(1801, 1)


Unnamed: 0,ImageId
0,004f40c73.jpg
1,006f39c41.jpg
2,00b7fb703.jpg
3,00bbcd9af.jpg
4,0108ce457.jpg


In [10]:
train_nan_df['missingCount'].hist()
train_nan_df['missingCount'].value_counts()

3    6239
4    5902
2     425
1       2
Name: missingCount, dtype: int64

In [11]:
def load_img(code,base,resize=True):
    path=f'{base}/{code}'
    img=cv2.imread(path)
    img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    if resize:
        img=cv2.resize(img,(256,256))
        
    return img

def validate_path(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [12]:
train_path='./tmp/train'
validate_path(train_path)

for code in tqdm(train_nan_df['ImageId']):
    img=load_img(code,base='./input/train_images')
    path=code.replace('.jpg','')
    cv2.imwrite(f'{train_path}/{path}.png',img)

  1%|          | 140/12568 [00:06<09:50, 21.05it/s]


KeyboardInterrupt: 

In [17]:
train_nan_df['ImageId']=train_nan_df['ImageId'].apply(lambda x:x.replace('.jpg','.png'))

In [25]:
BATCH_SIZE=32

def create_datagen():
    return ImageDataGenerator(zoom_range=0.1,
                             fill_mode='constant',
                             cval=0.,
                             rotation_range=10,
                             height_shift_range=0.1,
                             width_shift_range=0.1,
                             horizontal_flip=True,
                             vertical_flip=True,
                             rescale=1/255.,
                             validation_split=0.15)

def create_test_gen():
    return ImageDataGenerator(rescale=1/255.).flow_from_dataframe(test_nan_df,
                                                                 directory='./input/test_images/',
                                                                 x_col='ImageId',
                                                                 class_mode=None,
                                                                 target_size=(256,256),
                                                                 batch_size=BATCH_SIZE,
                                                                 shuffle=False)

def create_flow(datagen,subset):
    return datagen.flow_from_dataframe(train_nan_df,
                                      directory='./tmp/train',
                                      x_col='ImageId',
                                      y_col='allMissing',
                                      class_mode='other',
                                      target_size=(256,256),
                                      batch_size=BATCH_SIZE,
                                      subset=subset)

data_generator=create_datagen()
train_gen=create_flow(data_generator,'training')
val_gen=create_flow(data_generator,'validation')
test_gen=create_test_gen()

Found 10683 validated image filenames.
Found 1885 validated image filenames.
Found 1801 validated image filenames.


In [13]:
def build_model():
    densenet=DenseNet121(include_top=False,
                        input_shape=(256,256,3),
                        weights='./input/weight/DenseNet-BC-121-32-no-top.h5'
                        )

    model=Sequential()
    model.add(densenet)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(512,activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(1,activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                 optimizer=Nadam(),
                 metrics=['accuracy'])

    return model

In [14]:
model=build_model()
model.summary()

W0915 22:47:01.676863 140461510276864 deprecation_wrapper.py:119] From /home/tyonetrap/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0915 22:47:01.822533 140461510276864 deprecation_wrapper.py:119] From /home/tyonetrap/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0915 22:47:01.892099 140461510276864 deprecation_wrapper.py:119] From /home/tyonetrap/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0915 22:47:02.008172 140461510276864 deprecation_wrapper.py:119] From /home/tyonetrap/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.ge

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet121 (Model)          (None, 8, 8, 1024)        7037504   
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1024)              0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 1024)              4096      
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
batch_normalization_2 (Batch (None, 512)               2048      
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
__________

In [41]:
total_steps=train_nan_df.shape[0]/BATCH_SIZE

checkpoint=ModelCheckpoint('./output/model.h5',
                          monitor='val_acc',
                          verbose=1,
                          save_best_only=True,
                          save_weights_only=False,
                          mode='auto')

reduce_lr=ReduceLROnPlateau(monitor='val_loss',
                           patience=5,
                           verbose=1,
                           min_lr=1e-6)

history=model.fit_generator(train_gen,
                           steps_per_epoch=total_steps*0.85,
                           validation_data=val_gen,
                           validation_steps=total_steps*0.15,
                           epochs=40,
                           callbacks=[checkpoint,reduce_lr])

Epoch 1/40

Epoch 00001: val_acc improved from -inf to 0.64085, saving model to ./output/model.h5
Epoch 2/40

Epoch 00002: val_acc did not improve from 0.64085
Epoch 3/40

Epoch 00003: val_acc improved from 0.64085 to 0.78727, saving model to ./output/model.h5
Epoch 4/40

Epoch 00004: val_acc improved from 0.78727 to 0.81061, saving model to ./output/model.h5
Epoch 5/40

Epoch 00005: val_acc did not improve from 0.81061
Epoch 6/40

Epoch 00006: val_acc did not improve from 0.81061
Epoch 7/40

Epoch 00007: val_acc did not improve from 0.81061
Epoch 8/40

Epoch 00008: val_acc did not improve from 0.81061
Epoch 9/40

Epoch 00009: val_acc did not improve from 0.81061

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 10/40

Epoch 00010: val_acc improved from 0.81061 to 0.83926, saving model to ./output/model.h5
Epoch 11/40

Epoch 00011: val_acc improved from 0.83926 to 0.93103, saving model to ./output/model.h5
Epoch 12/40

Epoch 00012: val_acc did not 

In [3]:
history_df=pd.DataFrame(history.history)
history_df[['loss','val_loss']].plot()
history_df[['acc','acc_loss']].plot()

NameError: name 'history' is not defined

In [28]:
def tta_prediction(datagen,model,image,n_examples):
    samples=np.expand_dims(image,axis=0)
    it=datagen.flow(samples,batch_size=n_examples)
    yhats=model.predict_generator(it,steps=n_examples,verbose=0)
    summed=np.sum(yhats,axis=0)/n_examples
    return summed

In [35]:
model.load_weights('./output/model.h5')
y_test=np.empty(test_nan_df.shape)
for i,code in enumerate(tqdm(test_nan_df['ImageId'])):
    y_test[i]=tta_prediction(datagen=create_datagen(),
                            model=model,
                            image=load_img(base='./input/test_images',code=code),
                            n_examples=20)

100%|██████████| 1801/1801 [22:31<00:00,  1.32it/s]


In [36]:
test_nan_df['allMissing']=y_test

In [38]:
train_nan_df.to_csv('train_missing_count.csv',index=False)
test_nan_df.to_csv('test_missing_count.csv',index=False)