In [1]:
import numpy as np
import pandas as pd
import gc
from tqdm import tqdm

from tensorflow import keras

from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import sys
sys.path.append('..')
from data import test_df, submit_test_gen, BATCH_SIZE, MASK_BATCH_SIZE
from data import Mask_DataGenerator
from data import rle2mask, mask2rle, build_rles

Found 7540 validated image filenames.
Found 2514 validated image filenames.
Found 2514 validated image filenames.
Found 5506 validated image filenames.


# I. Load Test Data

In [2]:
test_df=pd.read_csv('../data/sample_submission.csv')
test_df.head()

Unnamed: 0,ImageId,EncodedPixels,ClassId
0,0000f269f.jpg,1 409600,0
1,000ccc2ac.jpg,1 409600,0
2,002451917.jpg,1 409600,0
3,003c5da97.jpg,1 409600,0
4,0042e163f.jpg,1 409600,0


In [3]:
# keep only the ImageId and we will
# predict if there is defects and 
# which kind(s) of defects it is

test_df=test_df[['ImageId']]

In [4]:
test_df.head()

Unnamed: 0,ImageId
0,0000f269f.jpg
1,000ccc2ac.jpg
2,002451917.jpg
3,003c5da97.jpg
4,0042e163f.jpg


# II. Predict if there is defect (Binary predictions)

(using the Dense Network)

In [None]:
# load the DenseNet model for predicting if it has defect or not
densenet_model=load_model('../1a_DenseNet_Kaiming/CNNDenseNet2classes.h5')
compute_steps_per_epoch=lambda x: int(np.ceil(x/BATCH_SIZE))

step_size_test=compute_steps_per_epoch(test_df.shape[0])
print('predicting...')
submit_test=densenet_model.predict(
    submit_test_gen,
    steps=step_size_test,
    verbose=1)

test_df['defect_label']=(submit_test>0.5).astype('int32')
test_df.head()
test_df.defect_label.value_counts()

predicting...

In [None]:
# mark down which images have defects
# so that we can further classify what kind of defects those are

test_df_defect=test_df[test_df['defect_label']==1].copy()

test_df_noDefect=test_df[test_df['defect_label']==0].copy()

# III. Further classify the types of the predicted defects
(using U-Net model)

In [7]:
# for the metrics in the model
def dice_coef(y_true,y_pred,smooth=1):
    
    y_true_f=K.flatten(y_true)
    y_pred_f=K.flatten(y_pred)
    
    intersection=K.sum(y_true_f*y_pred_f)
    
    return (2*intersection+smooth)/(K.sum(y_true_f)+K.sum(y_pred_f)+smooth)

In [8]:
U_net_model=load_model('../2_U-net_Kaiming/Segmentation_Unet.h520210927',
                       custom_objects={'dice_coef': dice_coef})

df=[] # for collecting predicted images
step=300


for i in range(0, test_df_defect.shape[0], step):
    batch_idx=list(range(i,min(test_df_defect.shape[0],i+step)))

    mask_test_gen=Mask_DataGenerator(
        batch_idx,
        df=test_df_defect,
        target_df=test_df_defect,
        mode='predict',
        base_path='../data/test_images',
        batch_size=1,
        shuffle=False
    )

    batch_pred_masks=U_net_model.predict(mask_test_gen,verbose=1)

    for j, b in tqdm(enumerate(batch_idx)):
        filename=test_df_defect['ImageId'].iloc[b]
        data={'ImageId':[filename,filename,filename,filename],
              'ClassId':[1,2,3,4],
              'EncodedPixels':['','','','']}
        image_df=pd.DataFrame(
            data,columns=['ImageId','ClassId','EncodedPixels'])
        pred_masks=batch_pred_masks[j,].round().astype(int)
        pred_rles=build_rles(pred_masks)

        for ii in range(4):
            image_df.loc[ii,'EncodedPixels']=pred_rles[ii]

        df.append(image_df)

    gc.collect()
    



300it [00:17, 17.55it/s]




300it [00:17, 17.61it/s]




300it [00:17, 17.57it/s]




300it [00:17, 17.63it/s]




300it [00:17, 17.47it/s]




300it [00:16, 17.86it/s]




300it [00:16, 17.93it/s]




300it [00:16, 18.05it/s]




300it [00:16, 17.84it/s]




136it [00:07, 17.97it/s]


# IV. Submission

In [9]:
df=pd.concat(df)
print(df.shape)
df.head(20)

(11344, 3)


Unnamed: 0,ImageId,ClassId,EncodedPixels
0,000ccc2ac.jpg,1,
1,000ccc2ac.jpg,2,
2,000ccc2ac.jpg,3,
3,000ccc2ac.jpg,4,
0,002451917.jpg,1,
1,002451917.jpg,2,
2,002451917.jpg,3,30342 1 30344 119 30469 5 30475 246 30724 253 ...
3,002451917.jpg,4,
0,003c5da97.jpg,1,
1,003c5da97.jpg,2,


In [10]:
# combine results from predicted masks with other images
tt=[]

for img in test_df_noDefect.index:
    image_df=pd.DataFrame(columns=['ImageId','ClassId','EncodedPixels'])
    for ii in range(4): # ii is the class of defect
        image_df.loc[ii,'EncodedPixels']=np.nan
        image_df.loc[ii,'ClassId']=ii+1
        image_df.loc[ii,'ImageId']=test_df_noDefect.loc[img,'ImageId']
    tt.append(image_df)

tt=pd.concat(tt)
print(tt.shape)


(10680, 3)


In [11]:
final_submission_df=pd.concat([df,tt])
print(final_submission_df.shape)
final_submission_df.head()

(22024, 3)


Unnamed: 0,ImageId,ClassId,EncodedPixels
0,000ccc2ac.jpg,1,
1,000ccc2ac.jpg,2,
2,000ccc2ac.jpg,3,
3,000ccc2ac.jpg,4,
0,002451917.jpg,1,


In [12]:
final_submission_df['EndcodedPixels']=final_submission_df['EncodedPixels'].apply(
    lambda x: np.nan if x=='' else x)

final_submission_df['ClassId']=final_submission_df['ClassId'].astype(str)
final_submission_df['ImageId_ClassId']=final_submission_df['ImageId']+'_'+final_submission_df['ClassId']

final_submission_df.head()


Unnamed: 0,ImageId,ClassId,EncodedPixels,EndcodedPixels,ImageId_ClassId
0,000ccc2ac.jpg,1,,,000ccc2ac.jpg_1
1,000ccc2ac.jpg,2,,,000ccc2ac.jpg_2
2,000ccc2ac.jpg,3,,,000ccc2ac.jpg_3
3,000ccc2ac.jpg,4,,,000ccc2ac.jpg_4
0,002451917.jpg,1,,,002451917.jpg_1


In [13]:
final_submission_df[['ImageId_ClassId','EncodedPixels']].to_csv(
    'csvfiles/DesNet_submission.csv',index=False)
