# Predicting Cloud Masks, Using a U-Net

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import sys
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Input, MaxPool2D, UpSampling2D, Concatenate, Conv2DTranspose
import tensorflow as tf
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint,ReduceLROnPlateau,CSVLogger
from keras import backend as K
import keras
from time import gmtime, strftime

Using TensorFlow backend.


In [2]:
sys.path.append('/home/james/GITHUB/ml_utils')

from models import Unet
from metrics import dice

In [3]:
date = strftime("%Y%m%d:%H:%M:%S", gmtime())
print(date)

20190904:23:14:24


In [4]:
img_dir = "/home/james/GITHUB/Kaggle/data/Clouds/"
models_dir = img_dir + "models/"
train_dir = img_dir + "train/"
test_dir = img_dir + "test/"
labels_file = img_dir + "train.csv"

In [5]:
height=256
width=256
channels=3

batch_size = 12

The labels are stored in a 204 MB csv file. 

In [6]:
labelsdf = pd.read_csv(labels_file)
print(labelsdf.shape)
labelsdf.dropna(inplace=True)
print(labelsdf.shape)

(22184, 2)
(11836, 2)


The format of the labels is a little messy and hard to use programatically. The label is the form "filename_cloudtype".  It will be easier to examine the labels if it is reorganized.

In [7]:
new = labelsdf["Image_Label"].str.split("_", n = 1, expand = True) 
df = pd.DataFrame()
df['filename'] = new[0]
df['type'] = new[1]
df['EncodedPixels'] = labelsdf['EncodedPixels']
df.head()

Unnamed: 0,filename,type,EncodedPixels
0,0011165.jpg,Fish,264918 937 266318 937 267718 937 269118 937 27...
1,0011165.jpg,Flower,1355565 1002 1356965 1002 1358365 1002 1359765...
4,002be4f.jpg,Fish,233813 878 235213 878 236613 878 238010 881 23...
5,002be4f.jpg,Flower,1339279 519 1340679 519 1342079 519 1343479 51...
7,002be4f.jpg,Sugar,67495 350 68895 350 70295 350 71695 350 73095 ...


In [8]:
def rle_to_mask(rle_string, height, width):
    
    rows, cols = height, width
    
    if rle_string == -1:
        return np.zeros((height, width))
    else:
        rle_numbers = [int(num_string) for num_string in rle_string.split(' ')]
        rle_pairs = np.array(rle_numbers).reshape(-1,2)
        img = np.zeros(rows*cols, dtype=np.uint8)
        for index, length in rle_pairs:
            index -= 1
            img[index:index+length] = 255
        img = img.reshape(cols,rows)
        img = img.T
        stacked_img = np.stack((img,)*3, axis=-1)
        return stacked_img

In [9]:
shape=(height,width,channels)
unet = Unet(shape)
model = unet.build_model()
model.summary()

<class 'tuple'> (256, 256, 3)
Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 256, 256, 64) 1792        input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 256, 256, 64) 36928       conv2d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 128, 128, 64) 0           conv2d_2[0][0]                   
_______

In [10]:
class CloudsImageReader(keras.utils.Sequence):

    def __init__(self,width,height,batch_size,df):

        self.width = width
        self.height = height
        self.batch_size = batch_size
        self.df = df
        self.on_epoch_end()
    
    def __len__(self):
        return int(self.df.shape[0]/batch_size)
    
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(0,self.df.shape[0])

    def __getitem__(self,index):
        
        start = index*self.batch_size
        end = (index+1)*self.batch_size

        image_placeholders = np.arange(start,end)
        
        x = np.array([])
        x = np.zeros((len(image_placeholders),self.width,self.height,3))

        y = np.array([])
        y = np.zeros((len(image_placeholders),self.width,self.height,1))


        counter = 0
        for i in image_placeholders:
            
            image = cv2.imread(train_dir+df['filename'].iloc[i],1)
            
            # Create Mask
            mask = rle_to_mask(df['EncodedPixels'].iloc[i],image.shape[0],image.shape[1])
            
            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
            
            # Resize Images
            image = cv2.resize(image, (self.height,self.width))
            mask  = cv2.resize(mask,  (self.height,self.width))
            
            mask = np.resize(mask,(self.width,self.height,1))
            
            x[counter] = image/255
            y[counter] = mask/255
            
            counter+=1
            
        return x,y        

In [11]:
filepath=models_dir + date + "-weights.hdf5"

checkpoint = ModelCheckpoint(filepath, 
                             monitor="dice_coef",
                             verbose=1, 
                             save_best_only=True, 
                             mode='max')

csv_logger = CSVLogger(models_dir + date + '-training.log')



reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.005,
                              patience=6, min_lr=0.001)

callbacks_list = [checkpoint,reduce_lr,csv_logger]

In [13]:
model.compile(
            optimizer=Adam(1e-4), 
            loss='binary_crossentropy', 
            metrics=[dice.dice_coef]
            )

In [14]:
train_gen = CloudsImageReader(width,height,batch_size,df)

In [None]:
model.load_weights(models_dir + "20190903:00:11:38-weights.hdf5")

history = model.fit_generator(train_gen, 
                    steps_per_epoch=int(df.shape[0]/batch_size), 
                    epochs=600,
                    callbacks=callbacks_list,
                    verbose=1,
                    max_queue_size=1000,
                    #initial_epoch=100,
                    use_multiprocessing=True,
                    workers=3,
                    shuffle=True
                   )

Instructions for updating:
Use tf.cast instead.
Epoch 1/600

Epoch 00001: dice_coef improved from -inf to 0.59474, saving model to /home/james/GITHUB/Kaggle/data/Clouds/models/20190904:23:14:24-weights.hdf5
Epoch 2/600

Epoch 00002: dice_coef improved from 0.59474 to 0.59486, saving model to /home/james/GITHUB/Kaggle/data/Clouds/models/20190904:23:14:24-weights.hdf5
Epoch 3/600

Epoch 00003: dice_coef did not improve from 0.59486
Epoch 4/600

Epoch 00004: dice_coef did not improve from 0.59486
Epoch 5/600

Epoch 00005: dice_coef improved from 0.59486 to 0.59548, saving model to /home/james/GITHUB/Kaggle/data/Clouds/models/20190904:23:14:24-weights.hdf5
Epoch 6/600

Epoch 00006: dice_coef improved from 0.59548 to 0.59554, saving model to /home/james/GITHUB/Kaggle/data/Clouds/models/20190904:23:14:24-weights.hdf5
Epoch 7/600

Epoch 00007: dice_coef did not improve from 0.59554
Epoch 8/600

Epoch 00008: dice_coef did not improve from 0.59554
Epoch 9/600

Epoch 00009: dice_coef did not imp

Exception in thread Thread-89:
Traceback (most recent call last):
  File "/home/slow-storage/local/Anaconda3/lib/python3.7/threading.py", line 917, in _bootstrap_inner
    self.run()
  File "/home/slow-storage/local/Anaconda3/lib/python3.7/threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "/home/slow-storage/local/Anaconda3/lib/python3.7/multiprocessing/pool.py", line 412, in _handle_workers
    pool._maintain_pool()
  File "/home/slow-storage/local/Anaconda3/lib/python3.7/multiprocessing/pool.py", line 248, in _maintain_pool
    self._repopulate_pool()
  File "/home/slow-storage/local/Anaconda3/lib/python3.7/multiprocessing/pool.py", line 241, in _repopulate_pool
    w.start()
  File "/home/slow-storage/local/Anaconda3/lib/python3.7/multiprocessing/process.py", line 112, in start
    self._popen = self._Popen(self)
  File "/home/slow-storage/local/Anaconda3/lib/python3.7/multiprocessing/context.py", line 277, in _Popen
    return Popen(process_obj)
 

