# overview

ok so i'm  gonna try 2 things:

thing # 1 finetuned models with finetuning two 1024 dense layers only:

aug3-work-1 (running)

this has several computational advantages (precomputing layers) and is already running.

thing # 2 is going to be multiple gpus (as many as i can get) training:

aug3-work-vgg19 (running)

aug3-work-resnet50 (running)

aug3-work-inceptionv3 (running)

aug3-work-xception (not running yet, gpu limit is 4)

aug3-work-custom (custom conv net, not running yet, gpu limit)

they will retrain up to 3 conv layers.
they will not precompute as it makes using data augmentation very difficult (bceause of the way the image data generator works).

this will take longer but w/ multi gpus it should go faster.

then i want to ensemble all of them (maybe throwing an xgboost fro mthat kernel and/or some random forests)

it all needs to use the same holdout data for kfolds and training the final model:

```
%cd $models_path
train_img = load_array('aug_3_train_img.dat')
hold_img = load_array('aug_3_hold_img.dat')
train_labels = load_array('aug_3_train_labels.dat')
hold_labels = load_array('aug_3_hold_labels.dat')
```

then well use the ensemble to pseudo label and do antoher round of training (maybe kfolded, maybe not) on that data for each model. then well include those models in the ensemble.

i want the blending to happen in its own notebook.

lets do this!

# ensembling

ok now lets ensemble the models we have:

dense ft: 

vgg19

xception

inceptiionv3

resnet50

conv ft:

vgg19

xception

inceptiionv3

resnet50

other models:

random forest ft

custom conv net

In [1]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/cpu:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 4318587422002395122, name: "/gpu:0"
 device_type: "GPU"
 memory_limit: 11332668621
 locality {
   bus_id: 2
 }
 incarnation: 259090500954017658
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:84:00.0"]

In [76]:
import os, glob, bcolz, gc

import numpy as np
import pandas as pd

from tqdm import tqdm
from scipy import ndimage, misc
from scipy.stats import rankdata

from sklearn.externals import joblib
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler

from keras import backend as K
from keras import optimizers
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator

from keras.layers.normalization import BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Activation, Input
from keras.layers.convolutional import MaxPooling2D, Convolution2D
from keras.layers.advanced_activations import PReLU, LeakyReLU

from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications.xception import Xception
from keras.applications.inception_v3 import preprocess_input as preprocess_input_incep_xcep
from keras.applications.imagenet_utils import preprocess_input as preprocess_input_vgg_resnet

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

In [3]:
def grab_optimizer(opt, lr):
    if opt == 'sgd':
        return optimizers.SGD(lr=lr, decay=1e-6, momentum=0.8, nesterov=True)
    elif opt == 'adam':
        return optimizers.Adam(lr=lr)
    elif opt == 'adagrad':
        return optimizers.Adagrad(lr=lr)
    elif opt == 'rmsprop':
        return optimizers.RMSprop(lr=lr)

def save_array(fname, arr):
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()

def load_array(fname):
    return bcolz.open(fname)[:]

def freeze_model(model):
    for layer in model.layers:
        layer.trainable = False
    return model

def conv_block(filter_depth, filter_size, pool_size, activation, inputs):
    x = Convolution2D(filter_depth, filter_size, activation=None)(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation)(x)
    x = MaxPooling2D(pool_size=pool_size)(x)
    return x

def dense_block(units, activation, drop_prob, inputs):
    x = Dense(units, activation=None)(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation)(x)
    x = Dropout(drop_prob)(x)
    return x

def make_conv_model(input_shape, optimizer):
    inputs = Input(shape=input_shape)
    m = conv_block(16, (3,3), (2,2),'relu', inputs=inputs)
    m = conv_block(32, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(64, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(128, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(256, (3,3), (2,2), 'relu', inputs=m)
    m = Flatten()(m)
    m = dense_block(2048, 'relu', 0.25, inputs=m)
    m = dense_block(2048, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def make_vgg19_conv(input_shape):
    base_model = VGG19(input_shape=input_shape, weights='imagenet', include_top=False)
    base_model = freeze_model(base_model)
    return base_model

def make_incepv3_conv(input_shape):
    base_model = InceptionV3(input_shape=input_shape, weights='imagenet', include_top=False)
    base_model = freeze_model(base_model)
    return base_model

def make_resnet50_conv(input_shape):
    base_model = ResNet50(input_shape=input_shape, weights='imagenet', include_top=False)
    base_model = freeze_model(base_model)
    return base_model

def make_xception_conv(input_shape):
    base_model = Xception(input_shape=input_shape, weights='imagenet', include_top=False)
    base_model = freeze_model(base_model)
    return base_model

def make_ft_dense(input_shape, optimizer):
    inputs = Input(shape=input_shape)
    m = Flatten()(inputs)
    m = dense_block(1024, 'relu', 0.25, inputs=m)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def make_vgg19_ft(input_shape, optimizer):
    base_model = VGG19(input_shape=input_shape, weights='imagenet', include_top=False)
    base_model = freeze_model(base_model)
    m = Flatten()(base_model.layers[-1].output)
    m = dense_block(1024, 'relu', 0.25, inputs=m)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def make_incepv3_ft(input_shape, optimizer):
    base_model = InceptionV3(input_shape=input_shape, weights='imagenet', include_top=False, pooling='avg')
    base_model = freeze_model(base_model)
    m = dense_block(1024, 'relu', 0.25, inputs=base_model.layers[-1].output)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def make_resnet50_ft(input_shape, optimizer):
    base_model = ResNet50(input_shape=input_shape, weights='imagenet', include_top=False, pooling='avg')
    base_model = freeze_model(base_model)
    m = dense_block(1024, 'relu', 0.25, inputs=base_model.layers[-1].output)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def make_xception_ft(input_shape, optimizer):
    base_model = Xception(input_shape=input_shape, weights='imagenet', include_top=False, pooling='avg')
    base_model = freeze_model(base_model)
    m = dense_block(1024, 'relu', 0.25, inputs=base_model.layers[-1].output)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=base_model.input, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [4]:
path = '/scratch/yns207/data_invasive/results/'

In [5]:
%cd $path
train_img = load_array('aug_3_train_img.dat')
hold_img = load_array('aug_3_hold_img.dat')
train_labels = load_array('aug_3_train_labels.dat')
hold_labels = load_array('aug_3_hold_labels.dat')

/scratch/yns207/data_invasive/results


In [12]:
model_files = glob.glob(path+'*aug3*')+glob.glob(path+'*aug4*')
model_files

['/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_4.model',
 '/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_1.model',
 '/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_base.model',
 '/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_2.model',
 '/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_2.model',
 '/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_base.model',
 '/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_history.out',
 '/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_history.out',
 '/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_base.model',
 '/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model',
 '/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_base.model',
 '/scratch/yns207/data_invasive/results/crowd_dense_vgg19_aug3_base.model',
 '/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_4.

# feature model ensemble (didn't really work)

In [13]:
# make df to hold preds
preds_df = pd.DataFrame([], columns=['true_label'])
preds_df['true_label'] = hold_labels

In [14]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_xception' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_xception_ft(hold_img[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_incep_xcep(hold_img.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        preds_df[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_xception' in weight_file:
        print(weight_file)
        base_model = make_xception_conv(hold_img[0].shape)
        va_img_proc = preprocess_input_incep_xcep(hold_img.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            preds_df[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            preds_df[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()
preds_df.head()

/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_xception_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_xception_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_xception_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_xception_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_3.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_0.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_1.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_4.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_2.model


Unnamed: 0,true_label,/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_4.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_0.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_1.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_4.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_2.model
0,0,0.46922,0.223394,0.335596,0.411062,0.496066,0.371322,0.14027,0.022839,0.206533,0.402066
1,0,0.085716,0.097333,0.08435,0.080826,0.085468,0.431893,0.211671,0.01597,0.235057,0.420486
2,0,0.166374,0.10468,0.142032,0.10077,0.091655,0.526311,0.259209,0.023033,0.26356,0.488666
3,1,0.811206,0.65484,0.788082,0.678479,0.605769,0.544119,0.264777,0.021816,0.260208,0.496182
4,1,0.912067,0.922749,0.923697,0.91279,0.91822,0.432768,0.197861,0.024539,0.217964,0.421262


In [15]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_inceptionv3' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_incepv3_ft(hold_img[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_incep_xcep(hold_img.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        preds_df[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_inceptionv3' in weight_file:
        print(weight_file)
        base_model = make_incepv3_conv(hold_img[0].shape)
        va_img_proc = preprocess_input_incep_xcep(hold_img.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            preds_df[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            preds_df[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()
preds_df.head()

/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_2.pkl
/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_4.pkl
/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_3.pkl
/scratch/yns207/data_invasive/result

Unnamed: 0,true_label,/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_4.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_0.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_1.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_4.model,...,/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_2.pkl,/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_4.pkl,/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_3.pkl,/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_1.pkl,/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_0.pkl
0,0,0.46922,0.223394,0.335596,0.411062,0.496066,0.371322,0.14027,0.022839,0.206533,...,0.006283,0.002278,0.093413,0.000803,0.112094,0,0,0,0,0
1,0,0.085716,0.097333,0.08435,0.080826,0.085468,0.431893,0.211671,0.01597,0.235057,...,0.008425,0.004075,0.171133,0.000974,0.165162,0,0,0,0,0
2,0,0.166374,0.10468,0.142032,0.10077,0.091655,0.526311,0.259209,0.023033,0.26356,...,0.020595,0.007241,0.103921,0.001312,0.115543,0,0,0,0,0
3,1,0.811206,0.65484,0.788082,0.678479,0.605769,0.544119,0.264777,0.021816,0.260208,...,0.018988,0.008517,0.905153,0.001279,0.911214,1,1,1,1,1
4,1,0.912067,0.922749,0.923697,0.91279,0.91822,0.432768,0.197861,0.024539,0.217964,...,0.005708,0.002595,0.929366,0.001055,0.913095,1,1,1,1,1


In [16]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_resnet50' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_resnet50_ft(hold_img[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_vgg_resnet(hold_img.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        preds_df[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_resnet50' in weight_file:
        print(weight_file)
        base_model = make_resnet50_conv(hold_img[0].shape)
        va_img_proc = preprocess_input_vgg_resnet(hold_img.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            preds_df[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            preds_df[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()
preds_df.head()

/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_4.model


Unnamed: 0,true_label,/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_4.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_0.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_1.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_4.model,...,/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_4.model,/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_4.model
0,0,0.46922,0.223394,0.335596,0.411062,0.496066,0.371322,0.14027,0.022839,0.206533,...,0.143757,0.146213,0.04161,0.11141,0.081078,0.19006,0.148111,0.275327,0.06117,0.205874
1,0,0.085716,0.097333,0.08435,0.080826,0.085468,0.431893,0.211671,0.01597,0.235057,...,0.346394,0.285356,0.004351,0.262322,0.030147,0.420669,0.009445,0.009432,0.019266,0.303732
2,0,0.166374,0.10468,0.142032,0.10077,0.091655,0.526311,0.259209,0.023033,0.26356,...,0.065518,0.054112,0.05418,0.054289,0.113123,0.071198,0.060323,0.1146,0.050469,0.08084
3,1,0.811206,0.65484,0.788082,0.678479,0.605769,0.544119,0.264777,0.021816,0.260208,...,0.793092,0.797899,0.302903,0.726413,0.397784,0.527289,0.710305,0.528827,0.259022,0.847422
4,1,0.912067,0.922749,0.923697,0.91279,0.91822,0.432768,0.197861,0.024539,0.217964,...,0.929621,0.926892,0.904296,0.945235,0.869027,0.943073,0.889473,0.910398,0.819139,0.937626


In [17]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_vgg19' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_vgg19_ft(hold_img[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_vgg_resnet(hold_img.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        preds_df[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_vgg19' in weight_file:
        print(weight_file)
        base_model = make_vgg19_conv(hold_img[0].shape)
        va_img_proc = preprocess_input_vgg_resnet(hold_img.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            preds_df[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            preds_df[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()
preds_df.head()

/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_0.pkl
/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_3.pkl
/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_1.pkl
/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_2.pkl
/scratch/yns207/data_invasive/results/invasive_

Unnamed: 0,true_label,/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_4.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_0.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_1.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_4.model,...,/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_0.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_3.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_1.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_2.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_4.pkl
0,0,0.46922,0.223394,0.335596,0.411062,0.496066,0.371322,0.14027,0.022839,0.206533,...,0.069307,0.327249,0.066538,0.259218,0.248361,0,0,0,0,0
1,0,0.085716,0.097333,0.08435,0.080826,0.085468,0.431893,0.211671,0.01597,0.235057,...,0.198275,0.476254,0.123544,0.31353,0.315375,0,0,0,0,0
2,0,0.166374,0.10468,0.142032,0.10077,0.091655,0.526311,0.259209,0.023033,0.26356,...,0.112519,0.245227,0.129059,0.168155,0.148507,0,0,0,0,0
3,1,0.811206,0.65484,0.788082,0.678479,0.605769,0.544119,0.264777,0.021816,0.260208,...,0.643835,0.919396,0.616146,0.912497,0.891011,1,1,1,1,1
4,1,0.912067,0.922749,0.923697,0.91279,0.91822,0.432768,0.197861,0.024539,0.217964,...,0.91771,0.899393,0.927121,0.931871,0.921927,1,1,1,1,1


In [21]:
print('preds v weight files:')
preds_df.shape, len([weight_file for weight_file in model_files if not 'history' in weight_file and not 'base' in weight_file])

preds v weight files:


((230, 51), 55)

In [22]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_custom_conv' in weight_file:
        print(weight_file)
        model = make_conv_model(hold_img[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_vgg_resnet(hold_img.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        preds_df[weight_file] = pd.Series(preds.flatten())
        del model
    K.clear_session()
    gc.collect()
preds_df.head()

/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_2.model
/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_4.model
/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_0.model
/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_1.model
/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_3.model


Unnamed: 0,true_label,/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_4.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_0.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_1.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_4.model,...,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_0.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_3.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_1.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_2.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_4.pkl,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_2.model,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_4.model,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_0.model,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_1.model,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_3.model
0,0,0.46922,0.223394,0.335596,0.411062,0.496066,0.371322,0.14027,0.022839,0.206533,...,0,0,0,0,0,0.538699,0.2388,0.462399,0.918738,0.535185
1,0,0.085716,0.097333,0.08435,0.080826,0.085468,0.431893,0.211671,0.01597,0.235057,...,0,0,0,0,0,0.158632,0.120844,0.063504,0.914742,0.278696
2,0,0.166374,0.10468,0.142032,0.10077,0.091655,0.526311,0.259209,0.023033,0.26356,...,0,0,0,0,0,0.27482,0.045885,0.164048,0.203533,0.322729
3,1,0.811206,0.65484,0.788082,0.678479,0.605769,0.544119,0.264777,0.021816,0.260208,...,1,1,1,1,1,0.797318,0.253356,0.803684,0.922963,0.897452
4,1,0.912067,0.922749,0.923697,0.91279,0.91822,0.432768,0.197861,0.024539,0.217964,...,1,1,1,1,1,0.921008,0.936997,0.925958,0.888957,0.925371


In [24]:
# make a model and fit it on the data
def holdout_preds_model(input_shape, opt):
    inputs = Input(shape=input_shape)
    m = dense_block(128, 'relu', 0, inputs=inputs)
    m = dense_block(256, 'relu', 0.25, inputs=m)
    m = dense_block(1024, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

In [28]:
hold_train = preds_df.values[:,1:]
hold_labels = preds_df.values[:,0]

feat_model = holdout_preds_model((55,), grab_optimizer('adam', lr = 0.0025))
hist = feat_model.fit(x=hold_train, y=hold_labels,batch_size=64,epochs=20,verbose=2)
holdout_score = roc_auc_score(hold_labels, feat_model.predict(hold_train)[:, 0])
print('holdout_aroc: '+str(holdout_score))

Epoch 1/20
0s - loss: 0.5194 - acc: 0.8043
Epoch 2/20
0s - loss: 0.3600 - acc: 0.9522
Epoch 3/20
0s - loss: 0.3513 - acc: 0.9696
Epoch 4/20
0s - loss: 0.3481 - acc: 0.9739
Epoch 5/20
0s - loss: 0.3437 - acc: 0.9783
Epoch 6/20
0s - loss: 0.3431 - acc: 0.9783
Epoch 7/20
0s - loss: 0.3337 - acc: 0.9739
Epoch 8/20
0s - loss: 0.3267 - acc: 0.9739
Epoch 9/20
0s - loss: 0.3220 - acc: 0.9783
Epoch 10/20
0s - loss: 0.3177 - acc: 0.9783
Epoch 11/20
0s - loss: 0.3113 - acc: 0.9826
Epoch 12/20
0s - loss: 0.3077 - acc: 0.9870
Epoch 13/20
0s - loss: 0.3054 - acc: 0.9913
Epoch 14/20
0s - loss: 0.3017 - acc: 0.9826
Epoch 15/20
0s - loss: 0.2957 - acc: 0.9870
Epoch 16/20
0s - loss: 0.2930 - acc: 0.9957
Epoch 17/20
0s - loss: 0.2877 - acc: 0.9957
Epoch 18/20
0s - loss: 0.2900 - acc: 0.9913
Epoch 19/20
0s - loss: 0.2823 - acc: 0.9957
Epoch 20/20
0s - loss: 0.2807 - acc: 0.9957
holdout_aroc: 0.996190920398


In [45]:
img_height = 300
img_width = 400
path = '/scratch/yns207/data_invasive/'
test_set = pd.read_csv(os.path.join(path,'sample_submission.csv'))
test_set = test_set.drop('invasive',1)

def read_img(img_path, img_shape):
    img = misc.imread(img_path)
    img = misc.imresize(img, img_shape)
    return img

test_img = []
for img_path in tqdm(test_set['name'].iloc[:]):
    test_img.append(read_img(os.path.join(path, 'test', str(img_path)+'.jpg'), (img_height, img_width)))

test_img = np.array(test_img)
test_img.shape


  0%|          | 0/1531 [00:00<?, ?it/s][A
  0%|          | 2/1531 [00:00<01:24, 18.18it/s][A
  0%|          | 5/1531 [00:00<01:16, 19.84it/s][A
  1%|          | 8/1531 [00:00<01:10, 21.76it/s][A
  1%|          | 11/1531 [00:00<01:05, 23.29it/s][A
  1%|          | 14/1531 [00:00<01:01, 24.52it/s][A
  1%|          | 17/1531 [00:00<01:06, 22.89it/s][A
  1%|▏         | 20/1531 [00:00<01:02, 23.99it/s][A
  2%|▏         | 23/1531 [00:00<01:00, 24.96it/s][A
  2%|▏         | 26/1531 [00:01<00:58, 25.79it/s][A
  2%|▏         | 29/1531 [00:01<00:56, 26.46it/s][A
  2%|▏         | 33/1531 [00:01<00:54, 27.43it/s][A
  2%|▏         | 36/1531 [00:01<00:54, 27.68it/s][A
  3%|▎         | 39/1531 [00:01<00:56, 26.59it/s][A
  3%|▎         | 42/1531 [00:01<00:55, 26.96it/s][A
  3%|▎         | 45/1531 [00:01<00:53, 27.74it/s][A
  3%|▎         | 48/1531 [00:01<00:52, 28.10it/s][A
  3%|▎         | 51/1531 [00:01<00:52, 28.20it/s][A
  4%|▎         | 54/1531 [00:02<00:52, 28.01it/s][A
  4%

(1531, 300, 400, 3)

In [46]:
test_preds = pd.DataFrame([], columns=['name'])
test_preds['name'] = test_set['name']

In [47]:
pred_data = test_img[:]
pred_data[0].shape

(300, 400, 3)

In [49]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_xception' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_xception_ft(pred_data[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_incep_xcep(pred_data.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        test_preds[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_xception' in weight_file:
        print(weight_file)
        base_model = make_xception_conv(pred_data[0].shape)
        va_img_proc = preprocess_input_incep_xcep(pred_data.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            test_preds[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            test_preds[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()

/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_xception_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_xception_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_xception_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_xception_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_3.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_0.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_1.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_4.model
/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_2.model


In [50]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_inceptionv3' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_incepv3_ft(pred_data[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_incep_xcep(pred_data.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        test_preds[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_inceptionv3' in weight_file:
        print(weight_file)
        base_model = make_incepv3_conv(pred_data[0].shape)
        va_img_proc = preprocess_input_incep_xcep(pred_data.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            test_preds[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            test_preds[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()

/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_conv_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_2.pkl
/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_4.pkl
/scratch/yns207/data_invasive/results/invasive_inceptionv3_rf_aug4_3.pkl
/scratch/yns207/data_invasive/result

In [51]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_resnet50' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_resnet50_ft(pred_data[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_vgg_resnet(pred_data.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        test_preds[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_resnet50' in weight_file:
        print(weight_file)
        base_model = make_resnet50_conv(pred_data[0].shape)
        va_img_proc = preprocess_input_vgg_resnet(pred_data.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            test_preds[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            test_preds[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()

/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_resnet50_conv_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_resnet50_aug3_4.model


In [52]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_vgg19' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_vgg19_ft(pred_data[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_vgg_resnet(pred_data.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        test_preds[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_vgg19' in weight_file:
        print(weight_file)
        base_model = make_vgg19_conv(pred_data[0].shape)
        va_img_proc = preprocess_input_vgg_resnet(pred_data.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            test_preds[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            test_preds[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()

/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_4.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_2.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_1.model
/scratch/yns207/data_invasive/results/invasive_vgg19_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_0.model
/scratch/yns207/data_invasive/results/invasive_vgg19_conv_aug3_3.model
/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_0.pkl
/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_3.pkl
/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_1.pkl
/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_2.pkl
/scratch/yns207/data_invasive/results/invasive_

In [53]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_custom_conv' in weight_file:
        print(weight_file)
        model = make_conv_model(pred_data[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_vgg_resnet(pred_data.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        test_preds[weight_file] = pd.Series(preds.flatten())
        del model
    K.clear_session()
    gc.collect()

/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_2.model
/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_4.model
/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_0.model
/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_1.model
/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_3.model


In [54]:
test_preds.head()

Unnamed: 0,name,/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_4.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_0.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_1.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_4.model,...,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_0.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_3.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_1.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_2.pkl,/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_4.pkl,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_2.model,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_4.model,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_0.model,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_1.model,/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_3.model
0,1,0.475886,0.362386,0.404198,0.319099,0.26783,0.421817,0.20079,0.017021,0.221065,...,1,1,1,1,1,0.87874,0.894349,0.913248,0.919591,0.915344
1,2,0.135472,0.130608,0.123282,0.093478,0.092063,0.478867,0.242152,0.019872,0.259043,...,0,0,0,0,0,0.402161,0.085883,0.534717,0.908381,0.242033
2,3,0.131044,0.104685,0.100378,0.115355,0.105781,0.579089,0.30369,0.023844,0.309664,...,0,0,0,0,0,0.136949,0.108558,0.179483,0.908819,0.259351
3,4,0.121735,0.128223,0.158176,0.125448,0.118463,0.464915,0.235118,0.021033,0.237109,...,0,0,0,0,0,0.213201,0.044814,0.283561,0.564429,0.093135
4,5,0.738732,0.845644,0.87884,0.773372,0.744316,0.379505,0.163826,0.012599,0.207037,...,1,1,1,1,1,0.914177,0.92523,0.895212,0.907326,0.932341


In [72]:
#had to rerun because i purged the keras session
hold_train = preds_df.values[:,1:]
hold_labels = preds_df.values[:,0]

feat_model = holdout_preds_model((55,), grab_optimizer('adam', lr = 0.0025))
hist = feat_model.fit(x=hold_train, y=hold_labels,batch_size=64,epochs=20,verbose=2)
holdout_score = roc_auc_score(hold_labels, feat_model.predict(hold_train)[:, 0])
print('holdout_aroc: '+str(holdout_score))

Epoch 1/20
0s - loss: 0.4903 - acc: 0.8348
Epoch 2/20
0s - loss: 0.3631 - acc: 0.9522
Epoch 3/20
0s - loss: 0.3502 - acc: 0.9609
Epoch 4/20
0s - loss: 0.3507 - acc: 0.9652
Epoch 5/20
0s - loss: 0.3390 - acc: 0.9739
Epoch 6/20
0s - loss: 0.3316 - acc: 0.9739
Epoch 7/20
0s - loss: 0.3274 - acc: 0.9826
Epoch 8/20
0s - loss: 0.3243 - acc: 0.9783
Epoch 9/20
0s - loss: 0.3159 - acc: 0.9913
Epoch 10/20
0s - loss: 0.3266 - acc: 0.9739
Epoch 11/20
0s - loss: 0.3118 - acc: 0.9870
Epoch 12/20
0s - loss: 0.3082 - acc: 0.9826
Epoch 13/20
0s - loss: 0.3028 - acc: 0.9913
Epoch 14/20
0s - loss: 0.3009 - acc: 0.9913
Epoch 15/20
0s - loss: 0.2975 - acc: 0.9870
Epoch 16/20
0s - loss: 0.2903 - acc: 0.9957
Epoch 17/20
0s - loss: 0.2882 - acc: 0.9913
Epoch 18/20
0s - loss: 0.2860 - acc: 0.9913
Epoch 19/20
0s - loss: 0.2851 - acc: 0.9957
Epoch 20/20
0s - loss: 0.2859 - acc: 0.9957
holdout_aroc: 0.993858830846


In [73]:
test_inputs = test_preds.values[:, 1:]

In [74]:
subm = pd.DataFrame([], columns=['name', 'invasive'])
subm['name'] = test_preds['name']
subm['invasive'] = feat_model.predict(test_inputs)
subm.head()

Unnamed: 0,name,invasive
0,1,0.691088
1,2,0.444459
2,3,0.46344
3,4,0.487702
4,5,0.712306


In [75]:
subm.to_csv(os.path.join('/scratch/yns207/data_invasive/results/subm0_aug52017.gz'), index=False, compression='gzip')

ok that was awful. that was like a 0.979 (worse than my lb score)

# regular data ranking ensemble

In [77]:
rank_preds = test_preds.iloc[:]
# try to rank avg instead:
for m in rank_preds.columns[1:]:
    rank_preds['r_'+m] = rankdata(rank_preds[m])
rank_preds.head()

Unnamed: 0,name,/scratch/yns207/data_invasive/results/invasive_xception_aug3_0.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_2.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_4.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_1.model,/scratch/yns207/data_invasive/results/invasive_xception_aug3_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_3.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_0.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_1.model,/scratch/yns207/data_invasive/results/invasive_xception_conv_aug4_4.model,...,r_/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_0.pkl,r_/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_3.pkl,r_/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_1.pkl,r_/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_2.pkl,r_/scratch/yns207/data_invasive/results/invasive_vgg19_rf_aug4_4.pkl,r_/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_2.model,r_/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_4.model,r_/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_0.model,r_/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_1.model,r_/scratch/yns207/data_invasive/results/invasive_custom_conv_aug4_3.model
0,1,0.475886,0.362386,0.404198,0.319099,0.26783,0.421817,0.20079,0.017021,0.221065,...,1182.5,1183.0,1185.0,1185.0,1185.0,1027.0,1108.0,1181.0,1051.0,927.0
1,2,0.135472,0.130608,0.123282,0.093478,0.092063,0.478867,0.242152,0.019872,0.259043,...,417.0,417.5,419.5,419.5,419.5,500.0,331.0,727.0,643.0,387.0
2,3,0.131044,0.104685,0.100378,0.115355,0.105781,0.579089,0.30369,0.023844,0.309664,...,417.0,417.5,419.5,419.5,419.5,79.0,405.0,244.0,650.0,402.0
3,4,0.121735,0.128223,0.158176,0.125448,0.118463,0.464915,0.235118,0.021033,0.237109,...,417.0,417.5,419.5,419.5,419.5,246.0,125.0,456.0,147.0,111.0
4,5,0.738732,0.845644,0.87884,0.773372,0.744316,0.379505,0.163826,0.012599,0.207037,...,1182.5,1183.0,1185.0,1185.0,1185.0,1226.0,1284.0,1100.0,607.0,1531.0


In [78]:
rank_columns = [c for c in rank_preds.columns if 'r_' in c]
rank_preds['r_avg'] = rank_preds[rank_columns].mean(axis=1)
rank_preds['final_r_blend'] = MinMaxScaler().fit_transform(rank_preds['r_avg'].reshape(-1, 1))
rank_preds[['name', 'final_r_blend']].head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,final_r_blend
0,1,0.704802
1,2,0.279681
2,3,0.409456
3,4,0.347852
4,5,0.667946


In [81]:
subm2 = rank_preds[['name', 'final_r_blend']]
subm2.columns = ['name', 'invasive']
subm2.to_csv(os.path.join('/scratch/yns207/data_invasive/results/subm1_aug52017.gz'), index=False, compression='gzip')

ok that was like a 0.973 even worse

# trained 10 best models on holdout data ensemble

invasive_custom_conv_aug4_history.out:

kfold: 0best model train acc: 0.9757869249394673, loss: 0.13482560780233102best model valid acc: 0.9733656174334141, loss: 0.16035327470475766best model train aroc score: 0.996117444132567, valid aroc score: 0.9901970541401275
kfold: 1best model train acc: 0.9860774818401937, loss: 0.12428658598127434best model valid acc: 0.9806295399515739, loss: 0.1333482606791988best model train aroc score: 0.9974966190881607, valid aroc score: 0.9970256100020165
kfold: 2best model train acc: 0.9915254237288136, loss: 0.11236434651777762best model valid acc: 0.9782082324455206, loss: 0.15071743907131813best model train aroc score: 0.9996432884644608, valid aroc score: 0.9947350377118229
kfold: 3best model train acc: 0.9915254237288136, loss: 0.10931705079124857best model valid acc: 0.9636803876978434, loss: 0.1451920561726965best model train aroc score: 0.999697669829995, valid aroc score: 0.9956707802547771
kfold: 4best model train acc: 0.988498789346247, loss: 0.10196954318599609best model valid acc: 0.9661016949152542, loss: 0.14297106970309056best model train aroc score: 0.9996427794862237, valid aroc score: 0.9915374165062831

invasive_inceptionv3_aug3_history.out:

kfold: 0best model train acc: 1.0, loss: 0.10009470794882093best model valid acc: 0.9322033875213697, loss: 0.21509663987967928best model train aroc score: 1.0, valid aroc score: 0.9837643255950632
kfold: 1best model train acc: 1.0, loss: 0.09467029169237931best model valid acc: 0.9322033872327273, loss: 0.21086167545930526best model train aroc score: 1.0, valid aroc score: 0.9846681565407204
kfold: 2best model train acc: 1.0, loss: 0.08919046808385965best model valid acc: 0.9418886172569404, loss: 0.20869693196137362best model train aroc score: 1.0, valid aroc score: 0.9802651708312086
kfold: 3best model train acc: 1.0, loss: 0.09285220925946501best model valid acc: 0.9564164635921506, loss: 0.18566454806281638best model train aroc score: 1.0, valid aroc score: 0.9908910399602879
kfold: 4best model train acc: 1.0, loss: 0.09434089244999551best model valid acc: 0.9346246960376712, loss: 0.2113053193248213best model train aroc score: 1.0, valid aroc score: 0.9834322323163367

invasive_inceptionv3_conv_aug3_history.out:

kfold: 0best model train acc: 0.9552058111380145, loss: 0.1771012589371522best model valid acc: 0.9394673113384201, loss: 0.21649798657068617best model train aroc score: 0.991391509433962, valid aroc score: 0.9805873322438469
kfold: 1best model train acc: 0.9612590799031477, loss: 0.16973518127390605best model valid acc: 0.920096849702461, loss: 0.22452036607063422best model train aroc score: 0.9930429864253394, valid aroc score: 0.9755815785327941
kfold: 2best model train acc: 0.9600484262944422, loss: 0.17730320813292164best model valid acc: 0.9152542346903545, loss: 0.24201346439541685best model train aroc score: 0.992784077642946, valid aroc score: 0.9735460627895008
kfold: 3best model train acc: 0.9527845039206037, loss: 0.17344929440258203best model valid acc: 0.9539951573849879, loss: 0.183441172067536best model train aroc score: 0.9910314823603861, valid aroc score: 0.991559894244458
kfold: 4best model train acc: 0.948547215496368, loss: 0.18072455904962942best model valid acc: 0.9370460025334763, loss: 0.21844266171028193best model train aroc score: 0.9910823559408466, valid aroc score: 0.9792007131184074


invasive_vgg19_conv_aug3_history.out:

kfold: 0best model train acc: 0.9769975786924939, loss: 0.13639422866896914best model valid acc: 0.951573848580044, loss: 0.18544544997573187best model train aroc score: 0.9976094393897895, valid aroc score: 0.9842839036755386
kfold: 1best model train acc: 0.9763922518159807, loss: 0.15110584299731775best model valid acc: 0.9636803848114198, loss: 0.18682712287648826best model train aroc score: 0.9979088287379884, valid aroc score: 0.9872958257713249
kfold: 2best model train acc: 0.9782082324455206, loss: 0.14574118027098243best model valid acc: 0.9612590786042572, loss: 0.18444043816434846best model train aroc score: 0.9975558992572224, valid aroc score: 0.9914162022292995
kfold: 3best model train acc: 0.9788135593220338, loss: 0.14567435884591165best model valid acc: 0.9685230027099498, loss: 0.15886767728132428best model train aroc score: 0.9975355138769525, valid aroc score: 0.9954222290419927
kfold: 4best model train acc: 0.9812348668280871, loss: 0.13393465297851379best model valid acc: 0.9443099273607748, loss: 0.191848979085756best model train aroc score: 0.9979286169534736, valid aroc score: 0.9862840967575913

invasive_xception_aug3_history.out:
kfold: 0best model train acc: 1.0, loss: 0.10268585916076388best model valid acc: 0.9515738498789347, loss: 0.19207329283182037best model train aroc score: 1.0, valid aroc score: 0.9898892877003185
kfold: 1best model train acc: 1.0, loss: 0.09145904395539882best model valid acc: 0.9564164651796835, loss: 0.18050229971547394best model train aroc score: 1.0, valid aroc score: 0.9920564654960405
kfold: 2best model train acc: 1.0, loss: 0.09171273335634074best model valid acc: 0.9467312328463316, loss: 0.18840859458752462best model train aroc score: 1.0, valid aroc score: 0.9888395218449169
kfold: 3best model train acc: 1.0, loss: 0.08945062879211389best model valid acc: 0.9539951573849879, loss: 0.181670237395723best model train aroc score: 1.0, valid aroc score: 0.990855762594893
kfold: 4best model train acc: 1.0, loss: 0.09176910078698729best model valid acc: 0.9394673126373106, loss: 0.20025796340395116best model train aroc score: 1.0, valid aroc score: 0.983490319204605

invasive_xception_conv_aug4_history.out:
kfold: 0best model train acc: 0.9703389830508474, loss: 0.16442912809785284best model valid acc: 0.9443099247629937, loss: 0.20654018169165234best model train aroc score: 0.9955924695459579, valid aroc score: 0.9842383567215782
kfold: 1best model train acc: 0.9612590797588265, loss: 0.14880651618031554best model valid acc: 0.9539951576736302, loss: 0.1784397566433969best model train aroc score: 0.9958648536499728, valid aroc score: 0.988538893818397
kfold: 2best model train acc: 0.9691283291534997, loss: 0.14613818335764056best model valid acc: 0.9346246976252041, loss: 0.20118755040821093best model train aroc score: 0.9965938768446556, valid aroc score: 0.9834689194826868
kfold: 3best model train acc: 0.9685230024213075, loss: 0.14822689929106622best model valid acc: 0.951573848580044, loss: 0.17696324016078044best model train aroc score: 0.9959245347773032, valid aroc score: 0.9894223555888972
kfold: 4best model train acc: 0.965496368038741, loss: 0.14698459965697788best model valid acc: 0.9467312351554704, loss: 0.2055562578737014best model train aroc score: 0.9953666016697272, valid aroc score: 0.9801595470921256

the best models are (model folds):

custom_conv_aug4 0 ,1 ,2 ,3, 4 (done)

inceptionv3_aug3 3 (done)

inceptionv3_conv_aug3 3 (done)

vgg19_conv_aug3 2 ,3 (done)

xception_aug3 1 ,2 ,3 (done)

xception_conv_aug4 1 ,3 (done)

thats 14 models total. so now ill open up the appropriate notebokos and do extra training for these on the 10% holdout data reensemble here once it's done.

# checkpoint 

In [84]:
img_height = 300
img_width = 400
path = '/scratch/yns207/data_invasive/'
test_set = pd.read_csv(os.path.join(path,'sample_submission.csv'))
test_set = test_set.drop('invasive',1)

def read_img(img_path, img_shape):
    img = misc.imread(img_path)
    img = misc.imresize(img, img_shape)
    return img

test_img = []
for img_path in tqdm(test_set['name'].iloc[:]):
    test_img.append(read_img(os.path.join(path, 'test', str(img_path)+'.jpg'), (img_height, img_width)))

test_img = np.array(test_img)
test_img.shape

100%|██████████| 1531/1531 [01:39<00:00, 15.34it/s]


(1531, 300, 400, 3)

In [85]:
test_preds = pd.DataFrame([], columns=['name'])
test_preds['name'] = test_set['name']
pred_data = test_img[:]

In [86]:
# we can access all new model files using the str '10p'
# which represents the extra 10percent of data they were
# trained on

model_files = glob.glob(os.path.join(path, '*10p*'))
model_files

['/scratch/yns207/data_invasive/invasive_inceptionv3_conv10p_aug3_3.model',
 '/scratch/yns207/data_invasive/invasive_inceptionv310p_aug6_1.model',
 '/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_0.model',
 '/scratch/yns207/data_invasive/invasive_xception10p_aug6_3.model',
 '/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_2.model',
 '/scratch/yns207/data_invasive/invasive_vgg19_conv10p_aug3_2.model',
 '/scratch/yns207/data_invasive/invasive_xception10p_aug6_2.model',
 '/scratch/yns207/data_invasive/invasive_xception_conv10p_aug6_3.model',
 '/scratch/yns207/data_invasive/invasive_inceptionv3_conv10p_aug3_0.model',
 '/scratch/yns207/data_invasive/invasive_xception10p_aug6_1.model',
 '/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_1.model',
 '/scratch/yns207/data_invasive/invasive_xception_conv10p_aug6_1.model',
 '/scratch/yns207/data_invasive/invasive_inceptionv310p_aug6_3.model',
 '/scratch/yns207/data_invasive/invasive_inceptionv310p_aug6_0.model',
 '

In [87]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_vgg19' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_vgg19_ft(pred_data[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_vgg_resnet(pred_data.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        test_preds[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_vgg19' in weight_file:
        print(weight_file)
        base_model = make_vgg19_conv(pred_data[0].shape)
        va_img_proc = preprocess_input_vgg_resnet(pred_data.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            test_preds[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            test_preds[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()

/scratch/yns207/data_invasive/invasive_vgg19_conv10p_aug3_2.model
/scratch/yns207/data_invasive/invasive_vgg19_conv10p_aug3_3.model


In [88]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_xception' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_xception_ft(pred_data[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_incep_xcep(pred_data.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        test_preds[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_xception' in weight_file:
        print(weight_file)
        base_model = make_xception_conv(pred_data[0].shape)
        va_img_proc = preprocess_input_incep_xcep(pred_data.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            test_preds[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            test_preds[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()

/scratch/yns207/data_invasive/invasive_xception10p_aug6_3.model
/scratch/yns207/data_invasive/invasive_xception10p_aug6_2.model
/scratch/yns207/data_invasive/invasive_xception_conv10p_aug6_3.model
/scratch/yns207/data_invasive/invasive_xception10p_aug6_1.model
/scratch/yns207/data_invasive/invasive_xception_conv10p_aug6_1.model


In [89]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_inceptionv3' in weight_file and 'conv' in weight_file:
        print(weight_file)
        model = make_incepv3_ft(pred_data[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_incep_xcep(pred_data.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        test_preds[weight_file] = pd.Series(preds.flatten())
        del model
    elif 'invasive_inceptionv3' in weight_file:
        print(weight_file)
        base_model = make_incepv3_conv(pred_data[0].shape)
        va_img_proc = preprocess_input_incep_xcep(pred_data.astype(np.float32))
        va_conv_feat = base_model.predict(va_img_proc)
        if 'rf' in weight_file:
            rf = joblib.load(weight_file)
            preds = rf.predict(va_conv_feat.reshape((va_conv_feat.shape[0], np.prod(va_conv_feat.shape[1:]))))
            test_preds[weight_file] = pd.Series(preds.flatten())
            del rf
        else:
            dense_model = make_ft_dense(input_shape=tuple(base_model.output[0].shape.as_list()), optimizer=grab_optimizer('sgd', 0.01))
            dense_model.load_weights(weight_file)
            preds = dense_model.predict(va_conv_feat)
            test_preds[weight_file] = pd.Series(preds.flatten()) 
            del dense_model
        del base_model
    K.clear_session()
    gc.collect()

/scratch/yns207/data_invasive/invasive_inceptionv3_conv10p_aug3_3.model
/scratch/yns207/data_invasive/invasive_inceptionv310p_aug6_1.model
/scratch/yns207/data_invasive/invasive_inceptionv3_conv10p_aug3_0.model
/scratch/yns207/data_invasive/invasive_inceptionv310p_aug6_3.model
/scratch/yns207/data_invasive/invasive_inceptionv310p_aug6_0.model


In [90]:
for weight_file in model_files:
    if 'history' in weight_file or 'base' in weight_file:
        continue
    if 'invasive_custom_conv' in weight_file:
        print(weight_file)
        model = make_conv_model(pred_data[0].shape, grab_optimizer('sgd', 0.01))
        va_img_proc = preprocess_input_vgg_resnet(pred_data.astype(np.float32))
        model.load_weights(weight_file)
        preds = model.predict(va_img_proc)
        test_preds[weight_file] = pd.Series(preds.flatten())
        del model
    K.clear_session()
    gc.collect()

/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_0.model
/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_2.model
/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_1.model
/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_4.model
/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6.model
/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_3.model


In [91]:
test_preds.head()

Unnamed: 0,name,/scratch/yns207/data_invasive/invasive_vgg19_conv10p_aug3_2.model,/scratch/yns207/data_invasive/invasive_vgg19_conv10p_aug3_3.model,/scratch/yns207/data_invasive/invasive_xception10p_aug6_3.model,/scratch/yns207/data_invasive/invasive_xception10p_aug6_2.model,/scratch/yns207/data_invasive/invasive_xception_conv10p_aug6_3.model,/scratch/yns207/data_invasive/invasive_xception10p_aug6_1.model,/scratch/yns207/data_invasive/invasive_xception_conv10p_aug6_1.model,/scratch/yns207/data_invasive/invasive_inceptionv3_conv10p_aug3_3.model,/scratch/yns207/data_invasive/invasive_inceptionv310p_aug6_1.model,/scratch/yns207/data_invasive/invasive_inceptionv3_conv10p_aug3_0.model,/scratch/yns207/data_invasive/invasive_inceptionv310p_aug6_3.model,/scratch/yns207/data_invasive/invasive_inceptionv310p_aug6_0.model,/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_0.model,/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_2.model,/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_1.model,/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_4.model,/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6.model,/scratch/yns207/data_invasive/invasive_custom_conv10p_aug6_3.model
0,1,0.559387,0.713652,0.124987,0.179677,0.614659,0.20302,0.315271,0.678069,0.818297,0.002236,0.829146,0.876235,0.905965,0.895371,0.912033,0.879126,0.855016,0.925263
1,2,0.141231,0.144457,0.099855,0.118945,0.678426,0.13761,0.329773,0.760254,0.139466,0.002253,0.122214,0.162226,0.872573,0.671102,0.884801,0.08222,0.769964,0.211114
2,3,0.214203,0.184858,0.148639,0.141944,0.702451,0.232053,0.290118,0.779871,0.209064,0.004222,0.27418,0.176594,0.166939,0.361476,0.888086,0.097101,0.324576,0.237481
3,4,0.310388,0.298077,0.153456,0.186123,0.577072,0.199615,0.221742,0.716804,0.138568,0.002641,0.120486,0.20793,0.372269,0.321786,0.2212,0.042537,0.348414,0.064134
4,5,0.911136,0.93237,0.733913,0.627227,0.43252,0.753983,0.195898,0.339054,0.913801,0.001506,0.906202,0.938966,0.925987,0.912434,0.887588,0.920405,0.789111,0.949741


In [92]:
# now lets rank avg
rank_preds = test_preds.iloc[:]
# try to rank avg instead:
for m in rank_preds.columns[1:]:
    rank_preds['r_'+m] = rankdata(rank_preds[m])
    
rank_columns = [c for c in rank_preds.columns if 'r_' in c]
rank_preds['r_avg'] = rank_preds[rank_columns].mean(axis=1)
rank_preds['final_r_blend'] = MinMaxScaler().fit_transform(rank_preds['r_avg'].reshape(-1, 1))
rank_preds[['name', 'final_r_blend']].head()

  if __name__ == '__main__':


Unnamed: 0,name,final_r_blend
0,1,0.583764
1,2,0.349623
2,3,0.39922
3,4,0.251202
4,5,0.579045


In [94]:
subm = rank_preds[['name', 'final_r_blend']]
subm.columns = ['name', 'invasive']
subm.to_csv(os.path.join(path, 'results', 'subm_aug6_0.gz'), index=False, compression='gzip')

awful a score of 0.97

In [99]:
# try ensembling just custom conv models:
rank_preds = test_preds[[c for c in test_preds.columns if 'custom_conv10p' in c or 'name' in c]].iloc[:]
# try to rank avg instead:
for m in rank_preds.columns[1:]:
    rank_preds['r_'+m] = rankdata(rank_preds[m])
    
rank_columns = [c for c in rank_preds.columns if 'r_' in c]
rank_preds['r_avg'] = rank_preds[rank_columns].mean(axis=1)
rank_preds['final_r_blend'] = MinMaxScaler().fit_transform(rank_preds['r_avg'].reshape(-1, 1))
subm = rank_preds[['name', 'final_r_blend']]
subm.columns = ['name', 'invasive']
subm.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,name,invasive
0,1,0.707268
1,2,0.35535
2,3,0.171283
3,4,0.062027
4,5,0.767164


In [100]:
subm.to_csv(os.path.join(path, 'results', 'subm_aug6_1.gz'), index=False, compression='gzip')

even worse 0.867

if you look carefully yo ucan see i forgot to pre