# setup 

will try:

resnet: https://ctmakro.github.io/site/on_learning/resnet_keras.html

simplenet: https://arxiv.org/pdf/1608.06037.pdf

leaky relu: https://keras.io/layers/advanced-activations/

this site has desciptions for voting ensembles, cool: https://mlwave.com/kaggle-ensembling-guide/

in combinatio with my convolutional ensemble from before.

In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices() 

In [None]:
import numpy as np
import pandas as pd

from scipy import ndimage
from scipy import misc

import os, gc, sys, glob
from tqdm import tqdm

from sklearn import model_selection
from sklearn import metrics

import keras
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Input, Activation, AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.metrics import categorical_accuracy
from keras.preprocessing.image import ImageDataGenerator

from keras.layers import Input, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D

from keras.applications.resnet50 import ResNet50

from keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
DATA_DIR = os.path.join('/scratch', 'yns207', 'data_invasive')

path = DATA_DIR
test_path = os.path.join(path, 'test')
models_path = os.path.join(path, 'results')
train_path = os.path.join(path, 'train')
valid_path = os.path.join(path, 'valid')
model_name = 'model_jun25_kfold'
print(path)

```
%cd $DATA_DIR
!module load centos/7
!7za x '*.7z'
```

# defining funcs

In [None]:
def grab_optimizer(opt, lr):
    if opt == 'sgd':
        return optimizers.SGD(lr=lr, decay=1e-6, momentum=0.8, nesterov=True)
    elif opt == 'adam':
        return optimizers.Adam(lr=lr)
    elif opt == 'adagrad':
        return optimizers.Adagrad(lr=lr)
    elif opt == 'rmsprop':
        return optimizers.RMSprop(lr=lr)

In [None]:
def dense_block(units, activation, drop_prob, inputs):
    x = BatchNormalization()(inputs)
    x = Dense(units, activation=activation)(x)
    x = Dropout(drop_prob)(x)
    return x

In [None]:
#my attempt at making a resnet identity block, wont be making any conv block
def resnet_block(filter_depth, filter_size, pool_size, activation, inputs):
    x = BatchNormalization()(inputs)
    x = Convolution2D(filter_depth, (1,1), activation=activation)(x)

    x = BatchNormalization()(x)
    x = Convolution2D(filter_depth, filter_size, activation=activation, padding='same')(x)
    
    x = BatchNormalization()(x)
    x = Convolution2D(filter_depth, (1,1))(x)
    
    shortcut = Convolution2D(filter_depth, (1,1))(inputs)
    shortcut = BatchNormalization()(shortcut)
    
    x = keras.layers.add([x, shortcut])
    x = Activation('relu')(x)
    return x

In [None]:
def make_model(input_shape, optimizer):
    inputs = Input(shape=input_shape)
    m = ZeroPadding2D((3,3))(inputs)
    m = Convolution2D(16, (3,3), strides=(2,2), activation='relu')(m)
    m = MaxPooling2D((3,3), strides=(2,2))(m)
    m = resnet_block(32, (3,3), (2,2), 'relu', inputs=m)
    m = resnet_block(64, (3,3), (2,2), 'relu', inputs=m)
    m = resnet_block(128, (3,3), (2,2), 'relu', inputs=m)
    m = AveragePooling2D((7, 7))(m)
    m = Flatten()(m)
    m = dense_block(2048, 'relu', 0.25, inputs=m)
    m = dense_block(512, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [None]:
def make_preds(model, test_data):
    # worth leaving a note:
    # prediction 1,2,3 from jun21 had misalgined the test names
    # also submission file 3 and 4 for jun21 are actually both rubmission 4 (accident)
    preds = model.predict(test_data).flatten()
    subm = test_set.copy()
    subm['invasive'] = preds
    return subm

In [None]:
train_set = pd.read_csv(os.path.join(path, 'train_labels.csv'))
test_set = pd.read_csv(os.path.join(path, 'sample_submission.csv'))

In [None]:
train_set.head(2)

In [None]:
#img_shape should eb tuple
#(H,W), height, width
def read_img(img_path, img_shape):
    img = misc.imread(img_path)
    img = misc.imresize(img, img_shape)
    return img

In [None]:
def read_imgs(img_height, img_width):
    train_img, test_img = [],[]
    for img_path in tqdm(train_set['name'].iloc[:]):
        train_img.append(read_img(os.path.join(path, 'train', str(img_path)+'.jpg'), (img_height, img_width)))

    for img_path in tqdm(test_set['name'].iloc[:]):
        test_img.append(read_img(os.path.join(path, 'test', str(img_path)+'.jpg'), (img_height, img_width)))

    train_img = np.array(train_img, np.float32)/255
    test_img = np.array(test_img, np.float32)/255
    train_label = np.array(train_set['invasive'].iloc[:])
    
    return train_img, test_img, train_label

In [None]:
train_img, test_img, train_label = read_imgs(300,400)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(misc.imresize(train_img[124], (300,400)))
plt.show()

In [None]:
%cd $DATA_DIR
def train_model_k_folds(model, train_data, train_label, model_out,  model_init_weights, epochs, kfolds):
    batch_size = 32
    kf = model_selection.KFold(n_splits=kfolds, shuffle=True)
    score_func = metrics.roc_auc_score

    i = 0
    models_stats = {}
    for train_ixs, valid_ixs in kf.split(train_data):
        x_train = train_data[train_ixs]
        x_valid = train_data[valid_ixs]
        y_train = train_label[train_ixs]
        y_valid = train_label[valid_ixs]

        gen = ImageDataGenerator(
            rotation_range = 30,
            width_shift_range = 0.2,
            height_shift_range = 0.2,
            shear_range = 0.2,
            zoom_range = 0.2,
            horizontal_flip = True,
            vertical_flip = True,
            fill_mode = 'nearest')
        
        #re-initialzie the weights of the model on each run
        model.load_weights(model_init_weights)
        model_out_file = '/scratch/yns207/data_invasive/{}_{}.model'.format(model_out, str(i))
        model_checkpoint = ModelCheckpoint(model_out_file, 
                                            monitor='val_loss', 
                                            save_best_only=True)
        early_stopping = EarlyStopping(monitor='val_loss', patience=25, verbose=1, mode='auto')

        hist = model.fit_generator(gen.flow(x_train, y_train, batch_size=batch_size),
                            steps_per_epoch=(len(x_train)//batch_size)+1,
                            validation_data=(x_valid,y_valid),
                            validation_steps=(len(x_valid)//batch_size)+1,
                            epochs=epochs,
                            verbose=1,
                            callbacks=[early_stopping, model_checkpoint])
        
        model.load_weights(model_out_file)
        
        eval_tr = model.evaluate(x_train, y_train)
        eval_va = model.evaluate(x_valid, y_valid)
        
        tr_score = score_func(y_train, model.predict(x_train)[:, 0])
        va_score = score_func(y_valid, model.predict(x_valid)[:, 0])
        
        print('\n')
        print('kfold: {}'.format(str(i)))
        print('best model train acc: {}, loss: {}'.format(eval_tr[1], eval_tr[0]))
        print('best model valid acc: {}, loss: {}'.format(eval_va[1], eval_va[0]))
        print('best model train aroc score: {}, valid aroc score: {}'.format(tr_score, va_score))
        print('\n')
        models_stats[model_out_file] = {'score_tr_va':[tr_score, va_score], 'train_acc_loss':[eval_tr[1], eval_tr[0]], 'val_acc_loss':[eval_va[1], eval_va[0]]}
        i += 1
        
    return models_stats

In [None]:
model = make_model((300,400,3), grab_optimizer('adam', 0.00025))
model.summary()

In [None]:
# save initial weights for use on each kfolds run
model.save_weights('{}_base.model'.format(model_name))
# run kfolds 10x
models_stats = train_model_k_folds(model, train_img, train_label, model_name, '{}_base.model'.format(model_name), 100, 10)

In [None]:
models_stats

ok this doesnt look so bad. lets do what we did before, take 3-4 good models and make an ensemble. 


model_jun25_kfold_7.model

{'score_tr_va': [0.99910285359237438,0.99650904474769919],
  'train_acc_loss': [0.98209099709583736, 0.055329413934892976],
  'val_acc_loss': [0.95196506576246565, 0.088502192666436913]}

model_jun25_kfold_6.model

{'score_tr_va': [0.99870266114630291,
   0.99662337662337663],
  'train_acc_loss': [0.98886737634228594, 0.043189480468010508],
  'val_acc_loss': [0.96506550218340614, 0.072713479034167486]}

model_jun25_kfold_5.model

{'score_tr_va': [0.9972994601947901,
   0.99728217426059151],
  'train_acc_loss': [0.97967086156824779, 0.063989531331752419],
  'val_acc_loss': [0.95633187772925765, 0.083368520903112323]}

model_jun25_kfold_4.model

{'score_tr_va': [0.99884112767346811,
   0.99765826873385011],
  'train_acc_loss': [0.98595641646489107, 0.045430601223148674],
  'val_acc_loss': [0.97826086956521741, 0.057762287557125092]}
  
model_jun25_kfold_3.model

{'score_tr_va': [0.99880816487589419,
   0.9979615133724723],
  'train_acc_loss': [0.98111380145278448, 0.056782990755382519],
  'val_acc_loss': [0.97391304347826091, 0.080385985452195871]},
 '/scratch/yns207/data_invasive/model_jun25_kfold_4.model': {'

model_jun25_kfold_1.model

{'score_tr_va': [0.99906311787072244,
   0.9931013099759709],
  'train_acc_loss': [0.98595641646489107, 0.044911352720012385],
  'val_acc_loss': [0.9652173913043478, 0.096299015989770059]}


In [None]:
subm = test_set.iloc[:]
subm = subm.drop('invasive',1)

%cd $DATA_DIR
models = [1,3,4,5,6,7]
model = make_model((300,400,3), grab_optimizer('adam', 0.00025))

for f in models:
    model.load_weights('{}_{}.model'.format(model_name, str(f)))
    subm[str(f)] = make_preds(model, test_img)['invasive']
subm.head()

In [None]:
subm['invasive'] = subm[[str(f) for f in models]].mean(axis=1)
subm = subm.drop([str(f) for f in models], 1)
subm.head()

In [None]:
subm.to_csv(os.path.join(DATA_DIR, 'results', 'subm_jun_27_17_0.gz'), index=False, compression='gzip')

that scored a 0.984 (less than my current 0.985) maybe if we jack it up with another layer we'll improve the score. or we could try rank averaging.

In [None]:
from scipy.stats import rankdata
from sklearn.preprocessing import MinMaxScaler

In [None]:
subm = test_set.iloc[:]
subm = subm.drop('invasive',1)

%cd $DATA_DIR
models = [1,3,4,5,6,7]
model = make_model((300,400,3), grab_optimizer('adam', 0.00025))

for f in models:
    model.load_weights('{}_{}.model'.format(model_name, str(f)))
    subm[str(f)] = make_preds(model, test_img)['invasive']
subm.head()

In [None]:
for f in models:
    subm['r'+str(f)] = rankdata(subm[str(f)])
subm.head()

In [None]:
subm['r_avg'] = subm[['r1','r3','r4','r5','r6','r7']].mean(axis=1)
subm.head()

In [None]:
subm['final_r_blend'] = MinMaxScaler().fit_transform(subm['r_avg'].reshape(-1, 1))

In [None]:
subm.head()

In [None]:
subm = subm[['name', 'final_r_blend']]
subm.head()

In [None]:
subm.to_csv(os.path.join(DATA_DIR, 'results', 'subm_jun_27_17_1.gz'), index=False, compression='gzip')

ok that had the exact same 0.984 score, LOL that's because the submission NEVER WENT THROUGH, i accidentally submitted the older file, my mistake, if it had gone through the column name is not invasive here so it would haev trhown an error

In [None]:
%cd $DATA_DIR
_, test_img_22, _ = read_imgs(128,128)
_, test_img_23_24, _ = read_imgs(300,400)

In [None]:
# lets try one more ensemble with 3 best conv models, the 3 best resnet models

subm = test_set.iloc[:]
subm = subm.drop('invasive',1)

# 3 best conv models,
# best resnet models, (omitting model 1)
models = [
        'model_jun23_kfold_3.model',
        'model_jun23_kfold_7.model',
        'model_jun23_kfold_9.model',
        'model_jun25_kfold_3.model',
        'model_jun25_kfold_4.model',
        'model_jun25_kfold_5.model',
        'model_jun25_kfold_6.model',
        'model_jun25_kfold_7.model'
        ]

In [None]:
def conv_block(filter_depth, filter_size, pool_size, activation, inputs):
    x = BatchNormalization()(inputs)
    x = Convolution2D(filter_depth, filter_size, activation=activation)(x)
    x = MaxPooling2D(pool_size=pool_size)(x)
    return x

def dense_block(units, activation, drop_prob, inputs):
    x = BatchNormalization()(inputs)
    x = Dense(units, activation=activation)(x)
    x = Dropout(drop_prob)(x)
    return x

def make_model_23(input_shape, optimizer):
    inputs = Input(shape=input_shape)
    m = conv_block(16, (3,3), (2,2),'relu', inputs=inputs)
    m = conv_block(32, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(64, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(128, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(256, (3,3), (2,2), 'relu', inputs=m)
    m = Flatten()(m)
    m = dense_block(2048, 'relu', 0.25, inputs=m)
    m = dense_block(512, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def conv_block(filter_depth, filter_size, pool_size, activation, inputs):
    x = BatchNormalization()(inputs)
    x = Convolution2D(filter_depth, filter_size, activation=activation)(x)
    x = MaxPooling2D(pool_size=pool_size)(x)
    return x

def dense_block(units, activation, drop_prob, inputs):
    x = BatchNormalization()(inputs)
    x = Dense(units, activation=activation)(x)
    x = Dropout(drop_prob)(x)
    return x

def make_model_22(input_shape, optimizer):
    inputs = Input(shape=input_shape)
    m = conv_block(16, (3,3), (2,2),'relu', inputs=inputs)
    m = conv_block(32, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(64, (3,3), (2,2), 'relu', inputs=m)
    m = conv_block(128, (3,3), (2,2), 'relu', inputs=m)
    m = Flatten()(m)
    m = dense_block(2048, 'relu', 0.55, inputs=m)
    m = dense_block(512, 'relu', 0.65, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

def dense_block(units, activation, drop_prob, inputs):
    x = BatchNormalization()(inputs)
    x = Dense(units, activation=activation)(x)
    x = Dropout(drop_prob)(x)
    return x

#my attempt at making a resnet identity block, wont be making any conv block
def resnet_block(filter_depth, filter_size, pool_size, activation, inputs):
    x = BatchNormalization()(inputs)
    x = Convolution2D(filter_depth, (1,1), activation=activation)(x)

    x = BatchNormalization()(x)
    x = Convolution2D(filter_depth, filter_size, activation=activation, padding='same')(x)
    
    x = BatchNormalization()(x)
    x = Convolution2D(filter_depth, (1,1))(x)
    
    shortcut = Convolution2D(filter_depth, (1,1))(inputs)
    shortcut = BatchNormalization()(shortcut)
    
    x = keras.layers.add([x, shortcut])
    x = Activation('relu')(x)
    return x

def make_model_25(input_shape, optimizer):
    inputs = Input(shape=input_shape)
    m = ZeroPadding2D((3,3))(inputs)
    m = Convolution2D(16, (3,3), strides=(2,2), activation='relu')(m)
    m = MaxPooling2D((3,3), strides=(2,2))(m)
    m = resnet_block(32, (3,3), (2,2), 'relu', inputs=m)
    m = resnet_block(64, (3,3), (2,2), 'relu', inputs=m)
    m = resnet_block(128, (3,3), (2,2), 'relu', inputs=m)
    m = AveragePooling2D((7, 7))(m)
    m = Flatten()(m)
    m = dense_block(2048, 'relu', 0.25, inputs=m)
    m = dense_block(512, 'relu', 0.5, inputs=m)
    outputs = dense_block(1, 'sigmoid', 0, inputs=m)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

#omitted 24 because it was just a poorly performing resnet50
model_23 = make_model_23((300,400,3), grab_optimizer('adam', 0.000125))
model_25 = make_model_25((300,400,3), grab_optimizer('adam', 0.0005))

for f in models:
    if 'jun23' in f:
        model_23.load_weights(f)
        model_test = model_23
        subm[str(f)] = make_preds(model_test, test_img_23_24)['invasive']
    else:
        model_25.load_weights(f)
        model_test = model_25
        subm[str(f)] = make_preds(model_test, test_img_23_24)['invasive']
subm.head()

In [None]:
for f in models:
    subm['r_'+str(f)] = rankdata(subm[str(f)])
subm.head()

In [None]:
subm['r_avg'] = subm[['r_'+f for f in models]].mean(axis=1)
subm.head()

In [None]:
subm['final_r_blend'] = MinMaxScaler().fit_transform(subm['r_avg'].reshape(-1, 1))

In [None]:
subm.head()

In [None]:
subm = subm[['name', 'final_r_blend']]
subm.columns = ['name', 'invasive']
subm.head()

In [None]:
subm.to_csv(os.path.join(DATA_DIR, 'results', 'subm_jun_27_17_2.gz'), index=False, compression='gzip')

alright well that rank average did improve my score, 0.98526 -> 0.98560, this ensemble is maybe worth building off of.

# summary

http://blog.kaggle.com/2017/04/20/dogs-vs-cats-redux-playground-competition-3rd-place-interview-marco-lugo/

https://gogul09.github.io/software/flower-recognition

good overview of voting/rank averraging ensembles:

http://andremeetsdata.com/2015/12/30/Numerai-Averaging

https://mlwave.com/kaggle-ensembling-guide/

https://www.kaggle.com/c/homesite-quote-conversion/discussion/18067

giving extra number after the decimal may hust us? worth investigating, or it could have no effect on receiver curve.