# setup 

In [1]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices() 

[name: "/cpu:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 2971641885611843514, name: "/gpu:0"
 device_type: "GPU"
 memory_limit: 11332668621
 locality {
   bus_id: 2
 }
 incarnation: 15488722034800260466
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:84:00.0"]

In [2]:
import numpy as np
import pandas as pd

from scipy import ndimage
from scipy import misc

import os, gc, sys, glob
from tqdm import tqdm

from sklearn import model_selection
from sklearn import metrics

import keras
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Input
from keras.layers.normalization import BatchNormalization
from keras.metrics import categorical_accuracy
from keras.preprocessing.image import ImageDataGenerator

from keras.layers import Input, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D

from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


In [3]:
DATA_DIR = os.path.join('/scratch', 'yns207', 'data_invasive')

path = DATA_DIR
test_path = os.path.join(path, 'test')
models_path = os.path.join(path, 'results')
train_path = os.path.join(path, 'train')
valid_path = os.path.join(path, 'valid')
print(path)

/scratch/yns207/data_invasive


In [None]:
%cd $DATA_DIR
!module load centos/7
!7za x '*.7z'

# defining funcs

In [66]:
def conv_block(filter_depth, filter_size, pool_size, activation, inputs):
    x = Convolution2D(filter_depth, filter_size, activation=activation)(inputs)
    x = MaxPooling2D(pool_size=pool_size)(x)
    return x

In [55]:
def dense_block(units, activation, drop_prob, inputs):
    x = Dense(units, activation=activation)(inputs)
    x = Dropout(drop_prob)(x)
    return x

In [56]:
def output_block(units, activation, inputs):
    x = Dense(units, activation=activation)(inputs)
    return x

In [57]:
def grab_optimizer(opt, lr):
    if opt == 'sgd':
        return optimizers.SGD(lr=lr, decay=1e-6, momentum=0.8, nesterov=True)
    elif opt == 'adam':
        return optimizers.Adam(lr=lr)
    elif opt == 'adagrad':
        return optimizers.Adagrad(lr=lr)
    elif opt == 'rmsprop':
        return optimizers.RMSprop(lr=lr)

In [None]:
def make_model1(input_shape, optimizer):
    inputs = Input(shape=input_shape)
    m = conv_block(16, (3,3), (2,2),'relu', inputs=inputs)
    m = conv_block(32, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(64, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(128, (3,3), (2,2), 'relu', inputs=m)
    m = Flatten()(m)
    m = dense_block(2048, 'relu', 0.55, inputs=m)
    m = dense_block(512, 'relu', 0.65, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [60]:
def make_preds(model, test_data):
    # worth leaving a note:
    # prediction 1,2,3 from jun21 had misalgined the test names
    # also submission file 3 and 4 for jun21 are actually both rubmission 4 (accident)
    preds = model.predict(test_data).flatten()
    subm = test_set.copy()
    subm['invasive'] = preds
    return subm

In [61]:
train_set = pd.read_csv(os.path.join(path, 'train_labels.csv'))
test_set = pd.read_csv(os.path.join(path, 'sample_submission.csv'))

In [62]:
train_set.head(2)

Unnamed: 0,name,invasive
0,1,0
1,2,0


In [63]:
#img_shape should eb tuple
#(H,W), height, width
def read_img(img_path, img_shape):
    img = misc.imread(img_path)
    img = misc.imresize(img, img_shape)
    return img

In [64]:
def read_imgs(img_height, img_width):
    train_img, test_img = [],[]
    for img_path in tqdm(train_set['name'].iloc[:]):
        train_img.append(read_img(os.path.join(path, 'train', str(img_path)+'.jpg'), (img_height, img_width)))

    for img_path in tqdm(test_set['name'].iloc[:]):
        test_img.append(read_img(os.path.join(path, 'test', str(img_path)+'.jpg'), (img_height, img_width)))

    train_img = np.array(train_img, np.float32)/255
    test_img = np.array(test_img, np.float32)/255
    train_label = np.array(train_set['invasive'].iloc[:])
    
    return train_img, test_img, train_label

In [None]:
train_img, test_img, train_label = read_imgs((128,128))

In [None]:
import matplotlib.pyplot as plt
plt.imshow(misc.imresize(train_img[0], (300,400)))
plt.show()

In [None]:
x_train, x_valid, y_train, y_valid = model_selection.train_test_split(train_img, train_label, test_size=0.20)

In [None]:
# try a few rounds of training
model = make_model1((128,128,3), grab_optimizer('sgd', 0.01))

early_stopping = EarlyStopping(monitor='val_loss', patience=7, verbose=1, mode='auto')

gen = ImageDataGenerator(
    rotation_range = 30,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = 'nearest')

# only required of featurewise center or zca whitening or a few other things
# gen.fit(x_train)

hist = model.fit_generator(gen.flow(x_train, y_train, batch_size=64),
                    steps_per_epoch=(len(x_train)//64) + 1,
                    validation_data=(x_valid,y_valid),
                    validation_steps=(len(x_valid)//64)+1,
                    epochs=25,
                    verbose=2,
                    callbacks=[early_stopping])

ok that seems to work, now lets try some k folds.

In [38]:
# ok now lets try a k folds

def train_model_k_folds(model, train_data, train_label, model_out, epochs, kfolds):
    kf = model_selection.KFold(n_splits=kfolds, shuffle=True)
    score_func = metrics.roc_auc_score

    i = 0

    for train_ixs, valid_ixs in kf.split(train_data):
        x_train = train_data[train_ixs]
        x_valid = train_data[valid_ixs]
        y_train = train_label[train_ixs]
        y_valid = train_label[valid_ixs]

        gen = ImageDataGenerator(
            rotation_range = 30,
            width_shift_range = 0.2,
            height_shift_range = 0.2,
            shear_range = 0.2,
            zoom_range = 0.2,
            horizontal_flip = True,
            vertical_flip = True,
            fill_mode = 'nearest')

        # only required of featurewise center or zca whitening or a few other things
        # gen.fit(x_train)
        
        model.reset_states() 
        
        model_checkpoint = ModelCheckpoint('/scratch/yns207/data_invasive/{}_{}.model'.format(model_out, str(i)), 
                                            monitor='val_loss', 
                                            save_best_only=True)
        early_stopping = EarlyStopping(monitor='val_loss', patience=7, verbose=1, mode='auto')

        hist = model.fit_generator(gen.flow(x_train, y_train, batch_size=64),
                            steps_per_epoch=(len(x_train)//64) + 1,
                            validation_data=(x_valid,y_valid),
                            validation_steps=(len(x_valid)//64)+1,
                            epochs=epochs,
                            verbose=1,
                            callbacks=[early_stopping, model_checkpoint])
        
        model.load_weights('/scratch/yns207/data_invasive/{}_{}.model'.format(model_out, str(i)))
        
        eval_tr = model.evaluate(x_train, y_train)
        eval_va = model.evaluate(x_valid, y_valid)
        
        tr_score = score_func(y_train, model.predict(x_train)[:, 0])
        va_score = score_func(y_valid, model.predict(x_valid)[:, 0])
        
        print('kfold: {}'.format(str(i)))
        print('best model train acc: {}, loss: {}'.format(eval_tr[1], eval_tr[0]))
        print('best model valid acc: {}, loss: {}'.format(eval_va[1], eval_va[0]))
        print('best model train aroc score: {}, valid aroc score: {}'.format(tr_score, va_score))
        print('\n')
        i += 1

In [None]:
# i changed the code a bit so this just reprsents how to run the original jun21_4 submission
model = make_model1((128,128,3), grab_optimizer('sgd', 0.005))
train_model_k_folds(model, train_img, 'model_jun21_kfold', 50, 10)

In [None]:
# befori methodized it. so no longer valid is this
# this is actually using the final weights per epoch and we want the best weights for each epoch so
# lets load them for each and calculate area under roc curve
# this is wrong. this whole cell is wrong because i reloaded the test/train data and its df everytime so we probably evaluate
#mdoel 6 based on data it was trained on...not the original witheld valid data.
%cd $DATA_DIR
g = glob.glob('model_jun21_kfold_*.model')

model = make_model((128,128,3), grab_optimizer('sgd', 0.005))
x_train, x_valid, y_train, y_valid = model_selection.train_test_split(train_img, train_label, test_size=0.20)

for f in g:
    model.load_weights(f)
    tr_score = score_func(y_train, model.predict(x_train)[:, 0])
    va_score = score_func(y_valid, model.predict(x_valid)[:, 0])
    print('model: {}'.format(f))
    print('tr score: {}, va score: {}'.format(tr_score, va_score))

# summary

it looks like model 6 did the best here so we should try to use model 6 to mak ea predictions on the test set or maybe one of the other onef like model 4 or model 9 provide lower validation area under roc but will generalize better.

In [None]:
%cd $DATA_DIR
model = make_model((128,128,3), grab_optimizer('sgd', 0.005))
model.load_weights('model_jun21_kfold_6.model')
subm = make_preds(model, test_img)
subm.to_csv(os.path.join(DATA_DIR, 'results', 'subm_june_21_2017_4.gz'), index=False, compression='gzip')

# cool submission 4 had a score of 0.95 on the leader board

ok so my plan is do one round of training with 128x128
then one with 225x300 
then one with 450x600
for every kfold.
these should all maintain aspect ratio

lets try 2 training rounds one on small images, one on bigger images


we cannot do multiple rounds of training with dif image sizes, figure out why that worked/improved vgg before...

# summary

ok that wasnt better than the 128x128 (deleted it) 10 fold kfold best model score final score, if got us 0.958 lower than my current 0.959, i want to try ensembling all the models in the original 10-fold cross validation. basically take an average and see what that does for us. ok so i didnt really do the kfolds right. what i should have done is: for each k fold when we train it mak epredictions on its test set, as we go through all the k folds we need to predict for each fold's test set the results, then use those results. instead of just picking the best model. bu actually since we didnt touch our test data at all in the kfolds (we generated folds from train_imgs only) we can use the stored models to make a prediction about the actual test set labels, ok actually we cant because trintest split works randomly and weve restarted the kernel since then. nvm we didnt pass in the test data. so what we have are 10 models, trained on dif segments of the training data which we can now combine to make a prediction about the held off test set. so we can for each tding in the test set mak ea prediction using each model, then avg them for the final prediction.

In [52]:
train_img, test_img, train_label = read_imgs(128,128)

100%|██████████| 2295/2295 [01:14<00:00, 30.93it/s]
100%|██████████| 1531/1531 [00:48<00:00, 31.46it/s]


In [78]:
subm = test_set.iloc[:]
subm = subm.drop('invasive',1)

%cd $DATA_DIR
g = glob.glob('model_jun21_kfold_*.model')

model = make_model1((128,128,3), grab_optimizer('sgd', 0.005))

for f in g:
    model.load_weights(f)
    subm[f] = make_preds(model, test_img)['invasive']
    
subm.head()

/scratch/yns207/data_invasive


Unnamed: 0,name,model_jun21_kfold_6.model,model_jun21_kfold_0.model,model_jun21_kfold_7.model,model_jun21_kfold_2.model,model_jun21_kfold_1.model,model_jun21_kfold_9.model,model_jun21_kfold_5.model,model_jun21_kfold_3.model,model_jun21_kfold_4.model,model_jun21_kfold_8.model
0,1,0.996251,0.990665,0.991979,0.992946,0.995229,0.992048,0.986676,0.98466,0.997173,0.993619
1,2,0.115335,0.096591,0.189778,0.114597,0.17219,0.112734,0.218326,0.141875,0.09681,0.115411
2,3,0.150518,0.104988,0.202259,0.121986,0.15194,0.115175,0.27847,0.158022,0.098422,0.175202
3,4,0.102934,0.118156,0.256066,0.134449,0.259272,0.116221,0.26241,0.190198,0.101841,0.138025
4,5,0.99481,0.992621,0.94018,0.988019,0.941458,0.993243,0.944794,0.925843,0.97686,0.992455


In [81]:
subm['invasive'] = subm[[f for f in g]].mean(axis=1)

In [84]:
subm = subm.drop([f for f in g],1)

In [85]:
subm.head()

Unnamed: 0,name,invasive
0,1,0.992125
1,2,0.137365
2,3,0.155698
3,4,0.167957
4,5,0.969028


In [86]:
subm.to_csv(os.path.join(DATA_DIR, 'results', 'subm_jun22_17_1.gz'), index=False, compression='gzip')

ok so this ensiemble actually made the score worse 0.952 instaed of 0.959

# REMINDER

when you change models or whatever move to an ew notebook, porbably nned a new one veery day. you keep changing things and its messing up stored weights. like adding batch norm and zero padding to layers that didnt have it. what im gonna do is revert this to its pure state (what it did before) then make a copy and do stuff in a new notebook.