the goal of this nb is to try to fit a model hevily to the training data. inceptionv3. our custom models weren't fitting that well but when ensembled produced a good result.

In [2]:
import os, glob, bcolz, gc

import numpy as np
import pandas as pd

from tqdm import tqdm
from scipy import ndimage, misc

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score

from keras import backend as K
from keras import optimizers
from keras.models import Model, load_model
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, Callback
from keras.preprocessing.image import ImageDataGenerator

from keras.layers.normalization import BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Activation, Input, concatenate, GlobalAveragePooling2D
from keras.layers.convolutional import MaxPooling2D, Convolution2D, AveragePooling2D

from keras.applications.inception_v3 import InceptionV3
from keras.applications.xception import Xception
from keras.applications.inception_v3 import preprocess_input as preprocess_input_incep_xcep

from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/cpu:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 7642482284206310879, name: "/gpu:0"
 device_type: "GPU"
 memory_limit: 11332668621
 locality {
   bus_id: 2
 }
 incarnation: 5456069913068642947
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:85:00.0"]

In [12]:
def delete_model(model, clear_session=True):
    '''
    !removes model!
    '''
    del model # remove variable
    gc.collect() # garbage collector
    if clear_session: K.clear_session() # clear TF graph

def grab_optimizer(opt, lr):
    if opt == 'sgd':
        return optimizers.SGD(lr=lr, decay=1e-6, momentum=0.8, nesterov=True)
    elif opt == 'adam':
        return optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    
def freeze_model(model):
    for layer in model.layers:
        layer.trainable = False
    return model

def dense_block(units, activation, drop_prob, inputs):
    x = Dense(units, activation=None)(inputs)
    x = Activation(activation)(x)
    x = Dropout(drop_prob)(x)
    return x

def make_incepv3_conv(input_shape):
    base_model = InceptionV3(input_shape=input_shape, weights='imagenet', include_top=False, pooling='avg')
    base_model = freeze_model(base_model)
    m = dense_block(1024, 'relu', 0, inputs=base_model.layers[-1].output)
    m = BatchNormalization()(m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    model = Model(inputs=base_model.input, outputs=outputs)
    return model

class ArocScore(Callback):
    def on_train_begin(self, logs={}):
        return
    
    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        y_true = np.around(self.validation_data[1])
        y_pred = self.model.predict(self.validation_data[0])
        print('val aroc: {}'.format(roc_auc_score(y_true, y_pred)))
        
    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

In [4]:
path = os.path.join('/scratch', 'yns207', 'data_invasive')
test_path = os.path.join(path, 'test')
results_path = os.path.join(path, 'results')
train_path = os.path.join(path, 'train')
valid_path = os.path.join(path, 'valid')

In [5]:
train_set = pd.read_csv(os.path.join(path, 'train_labels.csv'))
test_set = pd.read_csv(os.path.join(path, 'sample_submission.csv'))

def read_img(img_path, img_shape):
    img = misc.imread(img_path)
    img = misc.imresize(img, img_shape)
    return img

def read_imgs(img_height, img_width):
    train_img = []
    for img_path in tqdm(train_set['name'].iloc[:]):
        train_img.append(read_img(os.path.join(path, 'train', str(img_path)+'.jpg'), (img_height, img_width)))
    return np.array(train_img)

In [6]:
train_img = read_imgs(500,500)

100%|██████████| 2295/2295 [01:28<00:00, 26.02it/s]


In [7]:
train_labels = train_set['invasive'].values

In [8]:
# define callbacks
model_file = os.path.join(path,'invasive_incepv3_aug9_heavy_fit.model')

model_checkpoint = ModelCheckpoint(model_file, 
                                    monitor='loss', 
                                    save_best_only=True)

aroc_score = ArocScore()

reduce_lr = ReduceLROnPlateau(monitor='loss',
                      patience=5,
                      verbose=1,
                      factor=0.1,
                      cooldown=10,
                      min_lr=0.00001)

In [9]:
train_img = preprocess_input_incep_xcep(train_img.astype(np.float32))
x_train, x_valid, y_train, y_valid = train_test_split(train_img, train_labels, test_size=0.20, random_state=77)

In [14]:
batch_size = 16
epochs = 10

train_datagen = ImageDataGenerator(
        rotation_range=30,
        zoom_range=0.2,
        shear_range=0.2,
        width_shift_range=0.1,
        height_shift_range=0.1, 
        horizontal_flip=True)

train_datagen.fit(x_train)

In [None]:
# wipe out old model 
if os.path.isfile(model_file):
    os.remove(model_file)

model = make_incepv3_conv(x_train[0].shape)
model.compile(loss='binary_crossentropy', optimizer=grab_optimizer('adam', 0.0001))

# do a little dense training first
# to not wreck weights in conv layer
_ = model.fit_generator(train_datagen.flow(x_train, y_train, batch_size=batch_size, shuffle=True), 
                    steps_per_epoch=(len(x_train)//batch_size)+1,
                    validation_data=(x_valid,y_valid),
                    epochs=epochs,
                    callbacks=[model_checkpoint, reduce_lr])

conv_layers = [l for l in model.layers if type(l) is Convolution2D]
for l in conv_layers[:-84]:
    l.trainable = True
model.compile(loss='binary_crossentropy', optimizer=grab_optimizer('adam', 0.0001))

_ = model.fit_generator(train_datagen.flow(x_train, y_train, batch_size=batch_size, shuffle=True), 
                    steps_per_epoch=(len(x_train)//batch_size)+1,
                    validation_data=(x_valid,y_valid),
                    epochs=epochs*3,
                    callbacks=[model_checkpoint, reduce_lr])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30

In [None]:
_ = model.fit_generator(train_datagen.flow(x_train, y_train, batch_size=batch_size, shuffle=True), 
                    steps_per_epoch=(len(x_train)//batch_size)+1,
                    validation_data=(x_valid,y_valid),
                    epochs=epochs*2,
                    callbacks=[model_checkpoint, reduce_lr])

In [None]:
hist = model.fit_generator(train_datagen.flow(x_train, y_train, batch_size=batch_size, shuffle=True), 
                    steps_per_epoch=(len(x_train)//batch_size)+1,
                    validation_data=(x_valid,y_valid),
                    epochs=epochs,
                    callbacks=[model_checkpoint, reduce_lr])

In [16]:
delete_model(model)

# summary 

1 - run without dense layers, final results (untrainable coonv, no drop) run for 10 epochs:

```
best model train loss: 0.4615103811480121
best model valid loss: 0.4845742255384366
best model train aroc score: 0.9105201204524523, valid aroc score: 0.8945979447398935
```

2 - run w/ one dense layer final reults (untrainable conv, no drop) run for 10 epochs:

```
best model train loss: 0.39631468901706934
best model valid loss: 0.47747906380229527
best model train aroc score: 0.998286203941731, valid aroc score: 0.9416010236314926
```

3 - run w/ two dense layer final reults (untrainable conv, no drop) run for 10 epochs:

important to note - that data was different for this run.

```
best model train loss: 0.36904695169079016
best model valid loss: 0.49238228940755974
best model train aroc score: 0.9998296450853887, valid aroc score: 0.9355432044463993
```

4 - run with two dense layers (5 epochs), with conv layers unlocked up to 84 (31, 62, 84) 10 epochs for each unlock, no drop:

note - comes out to 35 epochs of total training.

```
best model train loss: 0.28450368757081707
best model valid loss: 0.32045090205009724
best model train aroc score: 1.0, valid aroc score: 0.9924227278179857
```

5 - run with 2x 256 dense layers w/ batchnorm outside func declaration.


wow that vastly improved stuff.

```
loss: 0.0683 - val_loss: 1.0505
```

6 - try 1x 1024 dense layers with data aug, etc:

```
makde it to 0.10 10 conv retrainings in so thats fine.
```

7 - 1x1024 dense layer, with 500x500 images, jacked epochs up to 50.


```
```


In [None]:
model = load_model(model_file)

eval_tr = model.evaluate(x_train, y_train)
eval_va = model.evaluate(x_valid, y_valid)

tr_score = roc_auc_score(np.around(y_train), model.predict(x_train)[:, 0])
va_score = roc_auc_score(np.around(y_valid), model.predict(x_valid)[:, 0])

print('\n')
print('best model train loss: {}'.format(eval_tr))
print('best model valid loss: {}'.format(eval_va))
print('best model train aroc score: {}, valid aroc score: {}'.format(tr_score, va_score))
print('\n')