In [None]:
from matplotlib import pyplot as plt
import matplotlib
%matplotlib inline
import cv2
import pandas as pd
import numpy as np
import os

%reload_ext autoreload
%autoreload 2

from sklearn.model_selection import train_test_split
import keras
from keras import backend as K
import gc
from keras.optimizers import Adam, RMSprop

# from utils import predict_test, noise_cv, evaluate, ensemble, get_model, ThreadsafeIter
# from models import *
# # from iterators import DataGenerator
# from generators import DataGenerator

In [None]:
# # Validation and train preprocessing
# noise_cv()
# df_train = pd.read_csv(os.path.join(DATA_ROOT,'train.csv'),index_col='id')
# depths = pd.read_csv(os.path.join(DATA_ROOT,'depths.csv'),index_col='id')
# folds = pd.read_csv(os.path.join(DATA_ROOT,'folds.csv'),index_col='id')
# df_train.join(depths).join(folds).sample(frac=1,random_state=123).to_csv(os.path.join(DATA_ROOT,'train_proc.csv'))

# dist = []
# for id in train.id.values:
#     img = cv2.imread('train/images/{}.png'.format(id), cv2.IMREAD_GRAYSCALE)
#     dist.append(np.unique(img).shape[0])
# train['unique_pixels'] = dist
# train.to_csv('data/train_proc_v2.csv',index=False)

In [None]:
DATA_ROOT = 'data/'
BATCH_SIZE = 16
EPOCHS = 300
BASE_PATH = '/home/branding_images/salt/'
MODEL_NAME = 'resnet_50_224'
MODEL_PATH = os.path.join(BASE_PATH,MODEL_NAME)

In [None]:
train = pd.read_csv(os.path.join(DATA_ROOT,'train_proc_v2.csv'))
test = pd.read_csv('data/sample_submission.csv')

In [None]:
from params import args
from models.models import get_model
from utils import freeze_model
from datasets.salt import SaltDataset
from augmentations import get_augmentations
from callbacks import get_callback
from utils import ThreadsafeIter
from losses import *

In [None]:
from models import unets
unets.resnet50_fpn((96,96,3),1).summary()

In [None]:
folds = [int(f) for f in args.fold.split(',')]
for fold in folds:
    K.clear_session()
    
    print('***************************** FOLD {} *****************************'.format(fold))
    
    model = get_model(args.network, (args.input_size, args.input_size, 3))
    # model.summary()
    
    MODEL_PATH = os.path.join(args.models_dir,args.network)
    if fold == 0:
        if os.path.isdir(MODEL_PATH):
            raise ValueError('Such Model already exists')
        os.system("mkdir {}".format(MODEL_PATH))
    
    if args.weights is None:
        print('No weights passed, training from scratch')
    else:
        weights_path = args.weights.format(fold)
        print('Loading weights from {}'.format(weights_path))
        model.load_weights(weights_path, by_name=True)
    
    # Check with model summary!
    freeze_model(model, args.freeze_till_layer)
    # model.summary()
    
    optimizer = RMSprop(lr=args.learning_rate)
    if args.optimizer:
        if args.optimizer == 'rmsprop':
            optimizer = RMSprop(lr=args.learning_rate, decay=float(args.decay))
        elif args.optimizer == 'adam':
            optimizer = Adam(lr=args.learning_rate, decay=float(args.decay))
    
    dataset = SaltDataset(args.images_dir, args.masks_dir, fold, args.n_folds, seed=args.seed)
    augmentations = get_augmentations(args.augmentation_name,p=args.augmentation_prob)
        
    train_generator = dataset.train_generator((args.input_size, args.input_size),
                                              args.batch_size,
                                              args.preprocessing_function,
                                              augmentations)
    
    val_generator = dataset.val_generator((args.input_size, args.input_size),
                                          args.batch_size * 2,
                                          args.preprocessing_function)
    
    
    best_model_file = os.path.join(MODEL_PATH,'best_{}{}_fold{}.h5'.format(args.alias, args.network, fold))
    
    callbacks = get_callback(args.callback,
                 weights_path=best_model_file,
                 early_stop_patience=args.early_stop_patience,
                 reduce_lr_factor=args.reduce_lr_factor,
                 reduce_lr_patience=args.reduce_lr_patience,
                 reduce_lr_min=args.reduce_lr_min)
    
    model.compile(optimizer=optimizer, loss=make_loss('bce_jacard'),
                      metrics=[dice_coef, jacard_coef])
    
    # Fit the model with Generators:
    history = model.fit_generator(generator=ThreadsafeIter(train_generator),
                    steps_per_epoch=dataset.train_ids.shape[0] // args.batch_size * 2,
                    epochs=args.epochs,
                    callbacks=callbacks,
                    validation_data=ThreadsafeIter(val_generator),
                    validation_steps=np.ceil(dataset.val_ids.shape[0] / args.batch_size),
                    workers=8)
    
    K.clear_session()
    del model
    gc.collect()

In [None]:
losses = []
dices = []
epochs = []
for fold in range(4,5):
    from keras import backend as K
    K.clear_session()
    print('***************************** FOLD {} *****************************'.format(fold))
    
    if fold == 0:
        if os.path.isdir(MODEL_PATH):
            print('Such Model already exists')
            break
        os.system("mkdir {}".format(MODEL_PATH))
    
    df_train = train[train.fold != fold].copy().reset_index(drop=True)
    df_valid = train[train.fold == fold].copy().reset_index(drop=True)
    
    ids_train, ids_valid = df_train[df_train.unique_pixels>1].id.values, df_valid.id.values
    
    print('Training on {} samples'.format(ids_train.shape[0]))
    print('Validating on {} samples'.format(ids_valid.shape[0]))
    
    # Initialize Model
    weights_path = os.path.join(MODEL_PATH,'fold_{fold}.hdf5'.format(fold=fold))
    model, callbacks, input_size, augs, preprocess = get_model(weights_path, MODEL_NAME)

#     # Fit the model with Iterators
#     training_generator = DataGenerator(ids_train, is_train=True, **params)
#     valid_generator = DataGenerator(ids_valid, is_train=False, **params)
#     # Train model on dataset
#     history = model.fit_generator(generator=training_generator,
#                         validation_data=valid_generator,
#                         epochs=EPOCHS,
#                         callbacks=callbacks,
#                         use_multiprocessing=False,
#                         workers=4)

    dg = DataGenerator(input_size=input_size[0], n_channels=input_size[-1], batch_size = BATCH_SIZE, augs = augs,
                      preprocess = preprocess)
    train_generator = dg.train_batch_generator(ids_train)
    validation_generator = dg.evaluation_batch_generator(ids_valid)
    
    # Fit the model with Generators:
    history = model.fit_generator(generator=ThreadsafeIter(train_generator),
                    steps_per_epoch=ids_train.shape[0] // BATCH_SIZE * 2,
                    epochs=EPOCHS,
                    callbacks=callbacks,
                    validation_data=ThreadsafeIter(validation_generator),
                    validation_steps=np.ceil(ids_valid.shape[0] / BATCH_SIZE),
                    workers=8)
    
    best_epoch = np.argmin(history.history['val_loss'])
    best_val_loss = np.min(history.history['val_loss'])
    best_val_dice_coeff = history.history['val_dice_coef'][best_epoch]

    losses.append(best_val_loss)
    dices.append(best_val_dice_coeff)
    epochs.append(best_epoch)
    print(best_val_loss)
         
    model.load_weights(weights_path)

    # SAVE OOF PREDICTIONS
    dir_path = os.path.join(MODEL_PATH,'oof')
    os.system("mkdir {}".format(dir_path))
    pred = predict_test(model=model,
                    preds_path=dir_path,
                    oof=True,
                    ids=ids_valid,
                    batch_size=BATCH_SIZE*4,
                    thr=0.5,
                    TTA='',
                    preprocess=preprocess)
    
    # SAVE TEST PREDICTIONS
    dir_path = os.path.join(MODEL_PATH,'fold_{}'.format(fold))
    os.system("mkdir {}".format(dir_path))
    pred = predict_test(model=model,
                    preds_path=dir_path,
                    oof=False,
                    ids=test.id.values,
                    batch_size=BATCH_SIZE*4,
                    thr=0.5,
                    TTA='',
                    preprocess=preprocess)
    
    # Run a single fold
    # break

In [None]:
Single Batch
val_loss: 0.2542 - val_dice_coef: 0.8197 - val_jacard_coef: 0.6983
iout: 0.679506
jaccard: 0.714482
    
Double Batch:


In [None]:
dices

In [None]:
np.mean([0.8616627585740737,
 0.8088357706568134,
 0.8030663774281042,
 0.835186310659481,
 0.8300570451529922])

In [None]:
epochs

In [None]:
losses

In [None]:
MODEL_PATH

In [None]:
MODEL_PATH2 = '/home/branding_images/salt/initial_generator/'
# MODEL_PATH2 = '/home/branding_images/salt/unet_128/'
# MODEL_PATH2 = '/home/branding_images/salt/unet_128_dropout_adam/'
# MODEL_PATH2 = '/home/branding_images/salt/unet_128_v2/'

In [None]:
res = evaluate([MODEL_PATH], train[train.fold.isin([0,1,2,3])].id.values, 0.5)

In [None]:
res = evaluate([MODEL_PATH,MODEL_PATH2], train.id.values, 0.5)

In [None]:
print("{} / {} / {}".format(np.round(np.mean(res['iout']),5),np.round(np.mean(res['dice']),5),np.round(np.mean(res['jacard']),5)))

In [None]:
0.80788

In [None]:
train['iout'] = res['iout']
train.groupby('fold').iout.aggregate('mean')

In [None]:
pred = ensemble([MODEL_PATH,MODEL_PATH2],[0,1,2,3,4],test.id.values,0.5)
# pred = ensemble([MODEL_PATH],[0,1,2,3,4],test.id.values,0.5)

In [None]:
pred

In [None]:
pred

In [None]:
test['rle_mask'] = pred
test.to_csv('ens_1_9_80788.csv',index=False)

In [None]:
!python unets.py

In [None]:
def plot2x2Array(image, mask):
    f, axarr = plt.subplots(1,2)
    axarr[0].imshow(image)
    axarr[1].imshow(mask)
    axarr[0].grid()
    axarr[1].grid()
    axarr[0].set_title('Image')
    axarr[1].set_title('Mask')
    
for i in range(5):
    image, mask = dataset[np.random.randint(0, len(dataset))]
    plot2x2Array(image, mask)

In [None]:
def show_results(idx):
    from rle import rle_decode
    img = rle_decode(pred[idx],(101,101))
    plt.figure(figsize=(4,4))
    plt.imshow(img)
    
    plt.figure(figsize=(4,4))
    plt.imshow(cv2.imread('train/images/{}.png'.format(ids_valid[idx])))
    plt.figure(figsize=(4,4))
    plt.imshow(cv2.imread('train/masks/{}.png'.format(ids_valid[idx])))
    

In [None]:
for i in range(10):
    show_results(i)

# TEST PREDICTION

In [None]:
test = pd.read_csv('data/sample_submission.csv')
test.head()

In [None]:
weights_path = '/home/branding_images/salt/initial_fold_0.hdf5'

In [None]:
#model = get_unet_128(input_shape=(128, 128, 3),num_classes=1)
#model = get_unet_128_kaggle(input_shape=(128, 128, 3),start_neurons=16,num_classes=1)
model.load_weights(weights_path)

In [None]:
pred = predict_test(model=model,
                    preds_path='/home/branding_images/salt/initial_fold_0',
                    ids=test.id.values,
                    batch_size=BATCH_SIZE*4,
                    thr=0.5,
                    TTA='')

In [None]:
for fold in range(5):
    weights_path = '/home/branding_images/salt/initial_fold_{}.hdf5'.format(fold)
    model.load_weights(weights_path)
    
    dir_path = '/home/branding_images/salt/initial_fold_{}'.format(fold)
    os.system("mkdir {}".format(dir_path))
    pred = predict_test(model=model,
                    preds_path=dir_path,
                    ids=test.id.values,
                    batch_size=BATCH_SIZE*4,
                    thr=0.5,
                    TTA='')
    break

In [None]:
pred

In [None]:
test['rle_mask'] = pred

In [None]:
np.mean([0.8977854151784638,
 0.8849150832019635,
 0.8765836107451583,
 0.9021209366713898,
 0.8764196126324356])

In [None]:
# Initial_generator (not actually true)
dices
[0.8977854151784638,
 0.8849150832019635,
 0.8765836107451583,
 0.9021209366713898,
 0.8764196126324356]

VALIDATION: 0.762575
+ TTA. HorizontalFlip: 0.77545
TTA. VerticalFlip: 0.770274
Vertical+Horizontal: 0.7754 
LB: 0.776
TTA. LB: 0.777 :(
    
fold 0: 0.785309   
LB: 0.748

fold
0    0.785309
1    0.750373
2    0.777559
3    0.767215
4    0.731465

0    0.791481
1    0.789552
2    0.805302
3    0.795949    
    
epochs
[40, 48, 23, 39, 46]

losses
[0.26385884437664053,
 0.28554327216981656,
 0.29580774510097857,
 0.2410078847427157,
 0.31404423417559096]
    
unet_128:
dices

[0.8928389563972567,
 0.8714483881471169,
 0.875522751475968,
 0.8969466935230207,
 0.8706270683343243]

epochs

[54, 33, 51, 38, 30]

losses

[0.27717545788597175,
 0.30893601562401546,
 0.33097895994580334,
 0.24521447161707696,
 0.2972317286357758]
    
CV: 0.757875
LB: 0.771
    
fold_0: 0.762716
LB: 0.727
    
fold
0    0.762716
1    0.743159
2    0.770284
3    0.758228
4    0.754777
    

adam_do_fold_0:
0.7662962962962964
LB: 0.751

In [None]:
np.mean(dices)

In [None]:
np.mean([0.8928389563972567,
 0.8714483881471169,
 0.875522751475968,
 0.8969466935230207,
 0.8706270683343243])

In [None]:
np.mean(dices1)

In [None]:
np.mean(dices2)

In [None]:
pred

In [None]:
test.head()

In [None]:
test.head(20)

In [None]:
test.to_csv('initial_genenerator_5_folds_887565.csv',index=False)

In [None]:
img.shape

In [None]:
pred[0]

In [None]:
from rle import rle_decode
img = rle_decode(pred[1],(101,101))

In [None]:
plt.imshow(img)

In [None]:
plt.imshow(img)

In [None]:
plt.imshow(cv2.imread('test/images/{}.png'.format(test.id.values[1])))

In [None]:
img = cv2.imread('test/images/{}.png'.format(test.id.values[0]))

In [None]:
UNET d4 augs (0.62)
LINKNET (0.70)
cv.gaussian blur for masks predicted smooting
absolutely black images -- drop in train. Null in test

loss: iou is overprediction. bce+jaccard
we could use 96 or 128 size.
bce+jaccard: 0.15 on validation; iou -- 0.8. LB 0.74

All white and all black images in train and test!

Should I use noi3e's folds? Yes, just dropping constant images during training and imputing during validation

Start with predicting: whether an image has mask at all -- binary classification.
    Then multiply these prior probabilities on mask obtained! Again: classification pipeline.
        
Some 100% incorrect masks!

FIND DUPLICATES OF IMAGES!

Change Kaggle architectures with comments from ods:
Use SpatialDropout2D and decrease dropout rate
Conv2DTranspose works worse than NN upsampling + conv
"You don't need multiprocessing, it will run batches preparation in different processes"
-- use multiprocessing in keras



TRY TO BINIRIZE MASK AFTER APPLYING RESIZE
HOW IS MY DICE CALCULATED? IT SHOULD HAVE THRESHOLD

DECREASE LR? Too noisy val. Change to rmsprop. DICE is pretty much stable. But loss is jumping
Ask question in ODS if won't manage
Also aks about augmentations for segmentaion. Shoud we use inverse transform for the masks predicted?

Increase Smoothing Parameter in Dice!

Write Own Cyclic LR? Custom with saving checkpoints!

In [None]:
Try to use own reduceLR. Because it's not clear where keras starts with decreasing LR
(I guess, from the last epoch. Not the best! So, reinitialize the model. Add some functions to not repeat the code)

In [None]:
import os
from keras.losses import binary_crossentropy
from keras_iterator import ImageDataGenerator
from losses import make_loss, dice_coef_clipped, dice_coef
from models import get_unet_resnet
from random_transform_mask import ImageWithMaskFunction
import keras.backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import Adam
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
img_height = 1280
img_width = 1918
out_height = 1280
out_width = 1918
input_height = 1024
input_width = 1024
use_crop = True
learning_rate = 0.00001
batch_size = 1
nbr_train_samples = 4576
nbr_validation_samples = 512
freeze_till_layer = "input_1"
nbr_epochs = 30
dataset_dir = '/home/selim/kaggle/datasets/carvana'
mask_dir = os.path.join(dataset_dir, "train_masks")
val_mask_dir = os.path.join(dataset_dir, "train_masks")
models_dir = '/home/selim/kaggle/models/carvana/resnet_2'
best_model_file = models_dir + "/resnet-refine-" + str(input_width) + format(learning_rate, ".6f") + "-{epoch:d}-{val_loss:0.7f}-{val_dice_coef_clipped:0.7f}.h5"
train_data_dir = os.path.join(dataset_dir, 'train_split_2')
val_data_dir = os.path.join(dataset_dir, 'train_val_2')
weights = "weights/resnet-on-test-combined-19200.000010-0-0.0037752-99.6908383.h5"
loss_function = "boot_hard"
def freeze_model(model, freeze_before_layer):
    if freeze_before_layer == "ALL":
        for l in model.layers:
            l.trainable = False
    else:
        freeze_before_layer_index = -1
        for i, l in enumerate(model.layers):
            if l.name == freeze_before_layer:
                freeze_before_layer_index = i
        for l in model.layers[:freeze_before_layer_index]:
            l.trainable = False
def preprocess_input_resnet(x, data_format=None):
    """Preprocesses a tensor encoding a batch of images.
    # Arguments
        x: input Numpy tensor, 4D.
        data_format: data format of the image tensor.
    # Returns
        Preprocessed tensor.
    """
    if data_format is None:
        data_format = K.image_data_format()
    assert data_format in {'channels_last', 'channels_first'}
    if data_format == 'channels_first':
        # 'RGB'->'BGR'
        x = x[:, ::-1, :, :]
        # Zero-center by mean pixel
        x[0, :, :] -= 103.939
        x[1, :, :] -= 116.779
        x[2, :, :] -= 123.68
    else:
        # 'RGB'->'BGR'
        x = x[:, :, ::-1]
        # Zero-center by mean pixel
        x[:, :, 0] -= 103.939
        x[:, :, 1] -= 116.779
        x[:, :, 2] -= 123.68
    return x
def preprocess_input(x):
    return preprocess_input_resnet(x)
model = get_unet_resnet((input_height, input_width, 3))
freeze_model(model, freeze_till_layer)
if weights is not None:
    model.load_weights(weights)
optimizer = Adam(lr=learning_rate)
model.summary()
model.compile(loss=make_loss(loss_function), optimizer=optimizer, metrics=[dice_coef, binary_crossentropy, dice_coef_clipped])
model.summary()
crop_size = None
if use_crop:
    crop_size = (input_height, input_width)
mask_function = ImageWithMaskFunction(out_size=(out_height, out_width), crop_size=crop_size, mask_dir=val_mask_dir)
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
)
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=True,
    classes=None,
    class_mode='regression',
    output_function=ImageWithMaskFunction(out_size=(out_height, out_width), crop_size=crop_size, mask_dir=mask_dir).mask_pred_train)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
val_generator = val_datagen.flow_from_directory(
    val_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=True,
    classes=None,
    class_mode='regression', output_function=mask_function.mask_pred_val)
best_model = ModelCheckpoint(best_model_file, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=True)
model.fit_generator(
    train_generator,
    steps_per_epoch=nbr_train_samples / batch_size + 1,
    epochs=nbr_epochs,
    validation_data=val_generator,
    validation_steps=nbr_validation_samples / batch_size + 1,
    callbacks=[best_model, EarlyStopping(patience=45, verbose=10)], workers=2)