In [1]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/cpu:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 17548800455135525783, name: "/gpu:0"
 device_type: "GPU"
 memory_limit: 11332668621
 locality {
   bus_id: 2
 }
 incarnation: 8376969518538152755
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:85:00.0"]

In [2]:
import os, glob, bcolz

import numpy as np
import pandas as pd

from tqdm import tqdm
from scipy import ndimage, misc

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score

from keras import backend as K
from keras import optimizers
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator

from keras.layers.normalization import BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Activation, Input
from keras.layers.convolutional import MaxPooling2D, Convolution2D
from keras.layers.advanced_activations import PReLU, LeakyReLU

from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications.xception import Xception
from keras.applications.inception_v3 import preprocess_input as preprocess_input_incep_xcep
from keras.applications.imagenet_utils import preprocess_input as preprocess_input_vgg_resnet

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [3]:
def read_img(img_path, img_shape):
    img = misc.imread(img_path)
    img = misc.imresize(img, img_shape)
    return img

def read_imgs(img_height, img_width):
    train_img, test_img = [],[]
    for img_path in tqdm(train_set['name'].iloc[:]):
        train_img.append(read_img(os.path.join(path, 'train', str(img_path)+'.jpg'), (img_height, img_width)))

    for img_path in tqdm(test_set['name'].iloc[:]):
        test_img.append(read_img(os.path.join(path, 'test', str(img_path)+'.jpg'), (img_height, img_width)))
    return np.array(train_img), np.array(test_img)

def save_array(fname, arr):
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()

def load_array(fname):
    return bcolz.open(fname)[:]

def freeze_model(model):
    for layer in model.layers:
        layer.trainable = False
    return model

def grab_optimizer(opt, lr):
    if opt == 'sgd':
        return optimizers.SGD(lr=lr, decay=1e-6, momentum=0.8, nesterov=True)
    elif opt == 'adam':
        return optimizers.Adam(lr=lr)
    elif opt == 'adagrad':
        return optimizers.Adagrad(lr=lr)
    elif opt == 'rmsprop':
        return optimizers.RMSprop(lr=lr)
    
def dense_block(units, activation, drop_prob, inputs):
    x = Dense(units, activation=None)(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation)(x)
    x = Dropout(drop_prob)(x)
    return x

def make_conv_model(input_shape, optimizer):
    inputs = Input(shape=input_shape)
    m = conv_block(16, (3,3), (2,2),'relu', inputs=inputs)
    m = conv_block(32, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(64, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(128, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(256, (3,3), (2,2), 'relu', inputs=m)
    m = Flatten()(m)
    m = dense_block(2048, 'relu', 0.25, inputs=m)
    m = dense_block(512, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def make_vgg19_ft(input_shape, optimizer):
    base_model = VGG19(input_shape=input_shape, weights='imagenet', include_top=False)
    base_model = freeze_model(base_model)
    m = Flatten()(base_model.layers[-1].output)
    m = dense_block(1024, 'relu', 0.25, inputs=m)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def make_incepv3_ft(input_shape, optimizer):
    base_model = InceptionV3(input_shape=input_shape, weights='imagenet', include_top=False)
    base_model = freeze_model(base_model)
    m = Flatten()(base_model.layers[-1].output)
    m = dense_block(1024, 'relu', 0.25, inputs=m)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def make_resnet50_ft(input_shape, optimizer):
    base_model = ResNet50(input_shape=input_shape, weights='imagenet', include_top=False)
    base_model = freeze_model(base_model)
    m = Flatten()(base_model.layers[-1].output)
    m = dense_block(1024, 'relu', 0.25, inputs=m)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def make_xception_ft(input_shape, optimizer):
    base_model = Xception(input_shape=input_shape, weights='imagenet', include_top=False)
    base_model = freeze_model(base_model)
    m = Flatten()(base_model.layers[-1].output)
    m = dense_block(1024, 'relu', 0.25, inputs=m)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# does not use precomputiation so it can use data augmentation
def train_kfolds_ft_conv(model, train_data, train_label, gen_train, gen_valid, model_out, model_init_weights, epochs, kfolds, batch_size):
    kf = KFold(n_splits=kfolds, shuffle=True)
    
    i = 0
    models_stats = {}
    for train_ixs, valid_ixs in kf.split(train_data):
        x_train = train_data[train_ixs]
        x_valid = train_data[valid_ixs]
        y_train = train_label[train_ixs]
        y_valid = train_label[valid_ixs]
                
        #re-initialzie the weights of the model on each run
        #by loading thi intiial stored weights from file
        model.load_weights(model_init_weights)
        model_out_file = '{}_{}.model'.format(model_out, str(i))
        model_checkpoint = ModelCheckpoint(model_out_file, 
                                            monitor='val_loss', 
                                            save_best_only=True)
        
        # fit the dense layers for 1/4 the epochs
        model.fit_generator(gen_train.flow(x_train, y_train, batch_size=batch_size),
                            steps_per_epoch=(len(x_train)//batch_size)+1,
                            validation_data=gen_valid.flow(x_valid, y_valid, batch_size=batch_size),
                            validation_steps=(len(x_valid)//batch_size)+1,
                            epochs=epochs//4,
                            verbose=1,
                            callbacks=[model_checkpoint])
        print('training last 3 conv layers:')
        # set last conv layer to trainable and fit for 3/4 the epochs
        conv_layers = [layer for layer in model.layers if type(layer) is Convolution2D]
        for layer in conv_layers[-3:]:
            layer.trainable = True

        model.fit_generator(gen_train.flow(x_train, y_train, batch_size=batch_size),
                            steps_per_epoch=(len(x_train)//batch_size)+1,
                            validation_data=gen_valid.flow(x_valid, y_valid, batch_size=batch_size),
                            validation_steps=(len(x_valid)//batch_size)+1,
                            epochs=epochs-(epochs//4),
                            verbose=1,
                            callbacks=[model_checkpoint])
        
        model.load_weights(model_out_file)
        
        eval_tr = model.evaluate(x_train, y_train)
        eval_va = model.evaluate(x_valid, y_valid)
        
        tr_score = roc_auc_score(y_train, model.predict(x_train)[:, 0])
        va_score = roc_auc_score(y_valid, model.predict(x_valid)[:, 0])
        
        print('\n')
        print('kfold: {}'.format(str(i)))
        print('best model train acc: {}, loss: {}'.format(eval_tr[1], eval_tr[0]))
        print('best model valid acc: {}, loss: {}'.format(eval_va[1], eval_va[0]))
        print('best model train aroc score: {}, valid aroc score: {}'.format(tr_score, va_score))
        print('\n')
        models_stats[model_out_file] = {'score_tr_va':[tr_score, va_score], 'train_acc_loss':[eval_tr[1], eval_tr[0]], 'val_acc_loss':[eval_va[1], eval_va[0]]}
        
        with open(os.path.join(models_path,'{}_{}.out'.format(model_out,'history')), 'a') as f:
            f.write('kfold: {}'.format(str(i)))
            f.write('best model train acc: {}, loss: {}'.format(eval_tr[1], eval_tr[0]))
            f.write('best model valid acc: {}, loss: {}'.format(eval_va[1], eval_va[0]))
            f.write('best model train aroc score: {}, valid aroc score: {}'.format(tr_score, va_score))
            f.write('\n')
        
        i += 1
    
    return models_stats

setup data dirs and read in imgs:

In [4]:
DATA_DIR = os.path.join('/scratch', 'yns207', 'data_invasive')

path = DATA_DIR
test_path = os.path.join(path, 'test')
models_path = os.path.join(path, 'results')
train_path = os.path.join(path, 'train')
valid_path = os.path.join(path, 'valid')
print('DATA_PATH: ' + path)

DATA_PATH: /scratch/yns207/data_invasive


# loading data

In [None]:
train_set = pd.read_csv(os.path.join(path, 'train_labels.csv'))
test_set = pd.read_csv(os.path.join(path, 'sample_submission.csv'))

In [None]:
train_img, test_img = read_imgs(300,400)
train_label = np.array(train_set['invasive'].iloc[:])

create a holdout set of 10%:

In [None]:
train_img, hold_img, train_labels, hold_labels = train_test_split(train_img, train_label, test_size=0.10)

In [None]:
train_img.shape, hold_img.shape, train_labels.shape, hold_labels.shape

save the datasets unaltered so they can be loaded again at a later point:

In [None]:
%cd $path
save_array('aug_3_train_img.dat', train_img)
save_array('aug_3_hold_img.dat', hold_img)
save_array('aug_3_train_labels.dat', train_labels)
save_array('aug_3_hold_labels.dat', hold_labels)

# checkpoint

read the datasets with bcolz:

In [5]:
%cd $path
train_img = load_array('aug_3_train_img.dat')
hold_img = load_array('aug_3_hold_img.dat')
train_labels = load_array('aug_3_train_labels.dat')
hold_labels = load_array('aug_3_hold_labels.dat')

/scratch/yns207/data_invasive


In [6]:
train_img.shape, hold_img.shape, train_labels.shape, hold_labels.shape

((2065, 300, 400, 3), (230, 300, 400, 3), (2065,), (230,))

In [7]:
batch_size = 32
epochs = 100
kfolds = 5
lr = 0.00025

In [None]:
# get it into right folder
%cd $models_path
model_name = 'invasive_vgg19_conv_aug3'
init_weights_model = '{}_base.model'.format(model_name)

# create model and save initial weights
model = make_vgg19_ft(train_img[0].shape, grab_optimizer('adam', lr))
model.save_weights(init_weights_model)

#make generators
gen_train = ImageDataGenerator(
    rotation_range = 30,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = 'nearest')

gen_valid = ImageDataGenerator()

#demean data coming through generators
gen_train.mean = np.array([103.939, 116.779, 123.68],dtype=np.float32).reshape(1,1,3)
gen_valid.mean = np.array([103.939, 116.779, 123.68],dtype=np.float32).reshape(1,1,3)

# train dense model on folds
performance1 = train_kfolds_ft_conv(model, train_img, train_labels, gen_train, gen_valid, model_name, init_weights_model, epochs, kfolds, batch_size)

/scratch/yns207/data_invasive/results
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
training last 3 conv layers:
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75

In [18]:
performance1

{'invasive_vgg19_conv_aug3_0.model': {'score_tr_va': [0.99760943938978952,
   0.98428390367553864],
  'train_acc_loss': [0.97699757869249393, 0.13639422866896914],
  'val_acc_loss': [0.95157384858004401, 0.18544544997573187]},
 'invasive_vgg19_conv_aug3_1.model': {'score_tr_va': [0.99790882873798836,
   0.98729582577132491],
  'train_acc_loss': [0.97639225181598066, 0.15110584299731775],
  'val_acc_loss': [0.96368038481141982, 0.18682712287648826]},
 'invasive_vgg19_conv_aug3_2.model': {'score_tr_va': [0.99755589925722243,
   0.99141620222929949],
  'train_acc_loss': [0.97820823244552058, 0.14574118027098243],
  'val_acc_loss': [0.96125907860425719, 0.18444043816434846]},
 'invasive_vgg19_conv_aug3_3.model': {'score_tr_va': [0.9975355138769525,
   0.99542222904199273],
  'train_acc_loss': [0.97881355932203384, 0.14567435884591165],
  'val_acc_loss': [0.96852300270994984, 0.15886767728132428]},
 'invasive_vgg19_conv_aug3_4.model': {'score_tr_va': [0.99792861695347357,
   0.9862840967575

# summary

so above i opted for data generation instead of preprocessing... lets try the preprocessing here instead.

In [9]:
model_name_new = 'invasive_vgg19_conv10p_aug3'
model_name = 'invasive_vgg19_conv_aug3'

x_hold = preprocess_input_vgg_resnet(hold_img.astype(np.float32))
x_train = preprocess_input_vgg_resnet(train_img.astype(np.float32))

# for the right folds load the right models
# 0 ,1 ,2 ,3 ,4
for i in [2, 3]:
    model_out_file = '{}_{}.model'.format(model_name_new, i)
    model_weights = '{}_{}.model'.format(model_name, i)
    model = make_vgg19_ft(hold_img[0].shape, grab_optimizer('adam', lr))
    model.load_weights(model_weights)
    
    model_checkpoint = ModelCheckpoint(model_out_file, monitor='val_loss', save_best_only=True)
    
    conv_layers = [layer for layer in model.layers if type(layer) is Convolution2D]
    for layer in conv_layers[-3:]:
        layer.trainable = True
    
    hist = model.fit(x=x_hold, y=hold_labels, 
                  batch_size=batch_size,
                  validation_data=(x_train,train_labels),
                  epochs=15,
                  verbose=1,
                  callbacks=[model_checkpoint])
    
    model.load_weights(model_out_file)
    tr_score = roc_auc_score(hold_labels, model.predict(hold_img)[:, 0])
    va_score = roc_auc_score(train_labels, model.predict(train_img)[:, 0])
    
    print('train score, valid score:')
    print(tr_score, va_score)

Train on 230 samples, validate on 2065 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
train score, valid score:
0.997356965174 0.995204770099
Train on 230 samples, validate on 2065 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
train score, valid score:
0.990593905473 0.997108877661
