the whole point of this notebook is to train inception modesl with 0-0.9 label smoothing and data augmentation. to see if this improves the mode lat all.

In [3]:
import os, glob, bcolz, gc

import numpy as np
import pandas as pd

from tqdm import tqdm
from scipy import ndimage, misc

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler

from keras import backend as K
from keras import optimizers
from keras.models import Model, load_model
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, Callback
from keras.preprocessing.image import ImageDataGenerator, random_rotation, random_shear, random_zoom, random_shift, flip_axis

from keras.layers.normalization import BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Activation, Input, concatenate, GlobalAveragePooling2D
from keras.layers.convolutional import MaxPooling2D, Convolution2D, AveragePooling2D
from keras.layers.advanced_activations import PReLU, LeakyReLU

from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications.xception import Xception
from keras.applications.inception_v3 import preprocess_input as preprocess_input_incep_xcep
from keras.applications.imagenet_utils import preprocess_input as preprocess_input_vgg_resnet

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/cpu:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 11767644291208995527, name: "/gpu:0"
 device_type: "GPU"
 memory_limit: 384565248
 locality {
   bus_id: 2
 }
 incarnation: 8250489668158734385
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:85:00.0"]

In [20]:
def grab_optimizer(opt, lr):
    if opt == 'sgd':
        return optimizers.SGD(lr=lr, decay=1e-6, momentum=0.8, nesterov=True)
    elif opt == 'adam':
        return optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

def inception_block(filter_depth, inputs):
    t1 = Convolution2D(filter_depth, (1, 1), padding='same', activation=None,use_bias=False)(inputs)
    t1 = BatchNormalization()(t1)
    t1 = Activation('relu')(t1)
    
    tower_1 = Convolution2D(filter_depth, (3, 3), padding='same', activation=None, use_bias=False)(t1)
    tower_1 = BatchNormalization()(tower_1)
    tower_1 = Activation('relu')(tower_1)
    
    tower_2 = Convolution2D(filter_depth, (5, 5), padding='same', activation=None, use_bias=False)(t1)
    tower_2 = BatchNormalization()(tower_2)
    tower_2 = Activation('relu')(tower_2)
    
    tower_3 = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(inputs)
    tower_3 = Convolution2D(filter_depth, (1, 1), padding='same', activation=None, use_bias=False)(tower_3)
    tower_3 = BatchNormalization()(tower_3)
    tower_3 = Activation('relu')(tower_3)
    
    return concatenate([tower_1, tower_2, tower_3], axis=3)

def dense_block(units, activation, drop_prob, inputs):
    x = Dense(units, activation=None)(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation)(x)
    x = Dropout(drop_prob)(x)
    return x

def make_incep(input_shape, optimizer):
    inputs = Input(shape=input_shape)
    
    # convolution preproccesing
    m = BatchNormalization()(inputs)
    m = Convolution2D(16, (3, 3), strides=(2,2), padding='valid', activation='relu')(m)
    m = MaxPooling2D((3, 3), strides=(2, 2))(m)
    #inception blocks
    m = BatchNormalization()(m)
    m = inception_block(32, m)
    m = inception_block(64, m)
    m = inception_block(128, m)
    m = GlobalAveragePooling2D()(m)
    m = BatchNormalization()(m)
    m = dense_block(1024, 'relu', 0.25, inputs=m)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

class ArocScore(Callback):
    def on_train_begin(self, logs={}):
        return
    
    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        y_pred = self.model.predict(self.validation_data[0]).flatten()
        y_true = np.around(self.validation_data[1]).flatten()
        print('val aroc: {}'.format(roc_auc_score(y_true, y_pred)))
        
    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

# does not use precomputiation so it can use data augmentation
def train_kfolds_smoothed(model, train_data, train_label, gen, model_out, model_init_weights, epochs, kfolds, batch_size):
    kf = KFold(n_splits=kfolds, shuffle=True)
    
    i = 0
    models_stats = {}
    for train_ixs, valid_ixs in kf.split(train_data):
        x_train = train_data[train_ixs]
        x_valid = train_data[valid_ixs]
        y_train = train_label[train_ixs]
        y_valid = train_label[valid_ixs]
        
        #re-initialzie the weights of the model on each run
        #by loading thi intiial stored weights from file
        model = load_model(model_init_weights)
        model_out_file = '{}_{}.model'.format(model_out, str(i))
        model_checkpoint = ModelCheckpoint(model_out_file, 
                                            monitor='val_loss', 
                                            save_best_only=True)
        
        aroc_score = ArocScore()
        
        reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              patience=5,
                              verbose=1,
                              factor=0.1,
                              cooldown=10,
                              min_lr=0.00001)
        
        _ = model.fit_generator(gen.flow(x_train, y_train), 
                            steps_per_epoch=(len(x_train)//batch_size)+1, 
                            epochs=epochs, 
                            validation_data=(x_valid, y_valid),
                            callbacks=[aroc_score, model_checkpoint, reduce_lr])
        
        model = load_model(model_out_file)
        
        eval_tr = model.evaluate(x_train, y_train)
        eval_va = model.evaluate(x_valid, y_valid)
        
        tr_score = roc_auc_score(np.around(y_train), model.predict(x_train)[:, 0])
        va_score = roc_auc_score(np.around(y_valid), model.predict(x_valid)[:, 0])
        
        print('\n')
        print('kfold: {}'.format(str(i)))
        print('best model train loss: {}'.format(eval_tr))
        print('best model valid loss: {}'.format(eval_va))
        print('best model train aroc score: {}, valid aroc score: {}'.format(tr_score, va_score))
        print('\n')
        models_stats[model_out_file] = {'score_tr_va':[tr_score, va_score], 'train_loss':[eval_tr], 'val_loss':[eval_va]}
        
        with open(os.path.join(results_path,'{}_{}.out'.format(model_out,'history')), 'a') as f:
            f.write('kfold: {}'.format(str(i)))
            f.write('best model train loss: {}'.format(eval_tr))
            f.write('best model valid loss: {}'.format(eval_va))
            f.write('best model train aroc score: {}, valid aroc score: {}'.format(tr_score, va_score))
            f.write('\n')
        
        i += 1
    
    return models_stats

In [5]:
path = os.path.join('/scratch', 'yns207', 'data_invasive')
test_path = os.path.join(path, 'test')
results_path = os.path.join(path, 'results')
train_path = os.path.join(path, 'train')
valid_path = os.path.join(path, 'valid')

In [6]:
train_set = pd.read_csv(os.path.join(path, 'train_labels.csv'))
test_set = pd.read_csv(os.path.join(path, 'sample_submission.csv'))

def read_img(img_path, img_shape):
    img = misc.imread(img_path)
    img = misc.imresize(img, img_shape)
    return img

def read_imgs(img_height, img_width):
    train_img, test_img = [],[]
    for img_path in tqdm(train_set['name'].iloc[:]):
        train_img.append(read_img(os.path.join(path, 'train', str(img_path)+'.jpg'), (img_height, img_width)))

    for img_path in tqdm(test_set['name'].iloc[:]):
        test_img.append(read_img(os.path.join(path, 'test', str(img_path)+'.jpg'), (img_height, img_width)))
    return np.array(train_img), np.array(test_img)

train_img, test_img = read_imgs(200,300)

100%|██████████| 2295/2295 [01:13<00:00, 31.27it/s]
100%|██████████| 1531/1531 [00:47<00:00, 32.08it/s]


In [7]:
train_labels = np.array(train_set['invasive'].iloc[:])

# invasive_customincep_data_aug_aug8 200x300, 0-0.9 smooth

In [21]:
%cd $path
batch_size = 32
epochs = 35
kfolds = 5
lr = 0.001

model_name = 'invasive_customincep_data_aug_aug8'
init_weights_model = '{}_base.model'.format(model_name)

model = make_incep(train_img[0].shape, grab_optimizer('adam', lr))
#recmompile the model without accuracy (since it doestn report w/ smoothed labels)
model.compile(loss='binary_crossentropy', optimizer=grab_optimizer('adam', lr))
model.save(init_weights_model)

proc_train_img = preprocess_input_incep_xcep(train_img.astype(np.float32))
smoothed_train_labels = MinMaxScaler(feature_range=(0,0.9)).fit_transform(train_labels.reshape(-1, 1)).flatten()

gen = ImageDataGenerator(
    rotation_range = 30,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = 'nearest')

# train dense model on folds
performance = train_kfolds_smoothed(model, proc_train_img, smoothed_train_labels, gen, model_name, init_weights_model, epochs, kfolds, batch_size)

/scratch/yns207/data_invasive




Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35

Epoch 00026: reducing learning rate to 0.00010000000474974513.
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


kfold: 0
best model train loss: 0.2687278964550667
best model valid loss: 0.3152203850107255
best model train aroc score: 0.9990337611482987, valid aroc score: 0.986862124264978


Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoc

In [22]:
performance

{'invasive_customincep_data_aug_aug8_0.model': {'score_tr_va': [0.99903376114829867,
   0.98686212426497799],
  'train_loss': [0.26872789645506667],
  'val_loss': [0.31522038501072552]},
 'invasive_customincep_data_aug_aug8_1.model': {'score_tr_va': [0.99738130537341863,
   0.9915935523362579],
  'train_loss': [0.27063731125237378],
  'val_loss': [0.30116128155348865]},
 'invasive_customincep_data_aug_aug8_2.model': {'score_tr_va': [0.9974768221927901,
   0.9926002480363787],
  'train_loss': [0.28165793951300494],
  'val_loss': [0.31525057382183658]},
 'invasive_customincep_data_aug_aug8_3.model': {'score_tr_va': [0.997469012697205,
   0.99600143948178654],
  'train_loss': [0.28272296822668419],
  'val_loss': [0.28071793465832479]},
 'invasive_customincep_data_aug_aug8_4.model': {'score_tr_va': [0.99853818870492694,
   0.99051496030938324],
  'train_loss': [0.2706827030462377],
  'val_loss': [0.311018905316303]}}

kind of looks like this is just increasing the loss.