fit an xception model 450x450

In [1]:
import os, glob, bcolz, gc

import numpy as np
import pandas as pd

from tqdm import tqdm
from scipy import ndimage, misc

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score

import keras
from keras import backend as K
from keras import optimizers
from keras.models import Model, load_model
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, Callback
from keras.preprocessing.image import ImageDataGenerator

from keras.layers.normalization import BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Activation, Input, concatenate, GlobalAveragePooling2D
from keras.layers.convolutional import MaxPooling2D, Convolution2D, AveragePooling2D

from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input as preprocess_input_xcep

from tensorflow.python.client import device_lib
device_lib.list_local_devices(), 'keras version: {}'.format(keras.__version__)

Using TensorFlow backend.


([name: "/cpu:0"
  device_type: "CPU"
  memory_limit: 268435456
  locality {
  }
  incarnation: 3858558907543186088, name: "/gpu:0"
  device_type: "GPU"
  memory_limit: 11332668621
  locality {
    bus_id: 2
  }
  incarnation: 18356050221539755011
  physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:85:00.0"],
 'keras version: 2.0.6')

In [2]:
def delete_model(model, clear_session=True):
    '''removes model!
    '''
    del model
    gc.collect()
    if clear_session: K.clear_session()
        
def save_array(fname, arr):
    '''Save numpy array as bcolz file
    '''
    c = bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()

def load_array(fname):
    '''Load bcolz file as numpy array
    '''
    return bcolz.open(fname)[:]

def grab_optimizer(opt, lr):
    if opt == 'sgd':
        return optimizers.SGD(lr=lr, decay=1e-6, momentum=0.8, nesterov=True)
    elif opt == 'adam':
        return optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    
def freeze_model(model):
    for layer in model.layers:
        layer.trainable = False
    return model

def unfreeze_model(model):
    for layer in model.layers:
        layer.trainable = True
    return model

def dense_block(units, activation, drop_prob, inputs):
    x = BatchNormalization()(inputs)
    x = Dense(units, activation=None)(x)
    x = Activation(activation)(x)
    x = Dropout(drop_prob)(x)
    return x

def make_xcep(input_shape):
    base_model = Xception(input_shape=input_shape, weights='imagenet', include_top=False, pooling=None)
    base_model = freeze_model(base_model)
    m = Flatten()(base_model.layers[-1].output)
    m = dense_block(512, 'relu', 0.25, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    model = Model(inputs=base_model.input, outputs=outputs)
    return model

# does not use precomputiation so it can use data augmentation
def train_kfolds(train_data, train_label, model_out, train_datagen, epochs, kfolds, batch_size):
    kf = KFold(n_splits=kfolds, shuffle=True)
    
    i = 0
    models_stats = {}
    for train_ixs, valid_ixs in kf.split(train_data):
        x_train = train_data[train_ixs]
        x_valid = train_data[valid_ixs]
        y_train = train_label[train_ixs]
        y_valid = train_label[valid_ixs]

        model = make_xcep(x_train[0].shape)
        model.compile(loss='binary_crossentropy', optimizer=grab_optimizer('adam', 0.00025))

        model_out_file = '{}_{}.model'.format(model_out, str(i))
        model_checkpoint = ModelCheckpoint(model_out_file, 
                                            monitor='val_loss', 
                                            save_best_only=True)
                
        reduce_lr = ReduceLROnPlateau(monitor='loss',
                              patience=7,
                              verbose=1,
                              factor=0.1,
                              cooldown=10,
                              min_lr=0.00001)
        
        _ = model.fit_generator(train_datagen.flow(x_train, y_train, batch_size=batch_size, shuffle=True), 
                            steps_per_epoch=(len(x_train)//batch_size)+1,
                            validation_data=(x_valid,y_valid),
                            epochs=epochs,
                            callbacks=[model_checkpoint, reduce_lr])

        conv_layers = [l for l in model.layers if type(l) is Convolution2D]
        for l in conv_layers:
            l.trainable = True
        model.compile(loss='binary_crossentropy', optimizer=grab_optimizer('adam', 0.00025))

        hist = model.fit_generator(train_datagen.flow(x_train, y_train, batch_size=batch_size, shuffle=True), 
                            steps_per_epoch=(len(x_train)//batch_size)+1,
                            validation_data=(x_valid,y_valid),
                            epochs=epochs*4,
                            callbacks=[model_checkpoint, reduce_lr])
        
        model = load_model(model_out_file)
        
        eval_tr = model.evaluate(x_train, y_train)
        eval_va = model.evaluate(x_valid, y_valid)
        
        tr_score = roc_auc_score(np.around(y_train), model.predict(x_train)[:, 0])
        va_score = roc_auc_score(np.around(y_valid), model.predict(x_valid)[:, 0])
        
        print('\n')
        print('kfold: {}'.format(str(i)))
        print('best model train loss: {}'.format(eval_tr))
        print('best model valid loss: {}'.format(eval_va))
        print('best model train aroc score: {}, valid aroc score: {}'.format(tr_score, va_score))
        print('\n')
        models_stats[model_out_file] = {'score_tr_va':[tr_score, va_score], 'train_loss':[eval_tr], 'val_loss':[eval_va]}
        
        with open(os.path.join(results_path,'{}_{}.out'.format(model_out,'history')), 'a') as f:
            f.write('kfold: {}'.format(str(i)))
            f.write('best model train loss: {}'.format(eval_tr))
            f.write('best model valid loss: {}'.format(eval_va))
            f.write('best model train aroc score: {}, valid aroc score: {}'.format(tr_score, va_score))
            f.write('\n')
        
        i += 1
        delete_model(model)
    
    return models_stats

In [3]:
path = os.path.join('/scratch', 'yns207', 'data_invasive')
results_path = os.path.join(path, 'results')
train_path = os.path.join(path, 'train')
valid_path = os.path.join(path, 'valid')

In [4]:
train_set = pd.read_csv(os.path.join(path, 'train_labels.csv'))
test_set = pd.read_csv(os.path.join(path, 'sample_submission.csv'))

def read_img(img_path, img_shape):
    img = misc.imread(img_path)
    img = misc.imresize(img, img_shape)
    return img

def read_imgs(img_height, img_width):
    train_img = []
    for img_path in tqdm(train_set['name'].iloc[:]):
        train_img.append(read_img(os.path.join(path, 'train', str(img_path)+'.jpg'), (img_height, img_width)))
    return np.array(train_img)

In [5]:
train_img = read_imgs(450,450)
x_train = preprocess_input_xcep(train_img.astype(np.float32))
y_train = train_set['invasive'].values

100%|██████████| 2295/2295 [01:17<00:00, 29.67it/s]


In [None]:
kfolds = 5
batch_size = 2
epochs = 10

model_file = 'invasive_xcep_aug12_kfolds_450x450'
model_init = '{}_init.model'.format(model_file)

train_datagen = ImageDataGenerator(
        rotation_range=30, 
        shear_range=0.2,
        width_shift_range=0.1,
        height_shift_range=0.1, 
        horizontal_flip=True)

train_datagen.fit(x_train)

In [None]:
%cd $path

perf = train_kfolds(x_train,
            y_train,
            model_file,
            train_datagen,
            epochs,
            kfolds,
            batch_size)

/scratch/yns207/data_invasive
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 00008: reducing learning rate to 2.5000001187436283e-05.
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40

In [8]:
perf

NameError: name 'perf' is not defined