In [1]:
import os, glob, bcolz, gc

import numpy as np
import pandas as pd

from tqdm import tqdm
from scipy import ndimage, misc
from scipy.stats import rankdata

from sklearn.externals import joblib
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler

from keras import backend as K
from keras.models import Model, load_model
from keras.applications.inception_v3 import preprocess_input as preprocess_input_incep_xcep

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
def read_img(img_path, img_shape):
    img = misc.imread(img_path)
    img = misc.imresize(img, img_shape)
    return img

def read_imgs(img_height, img_width):
    train_img, test_img = [],[]
    for img_path in tqdm(train_set['name'].iloc[:]):
        train_img.append(read_img(os.path.join(path, 'train', str(img_path)+'.jpg'), (img_height, img_width)))

    for img_path in tqdm(test_set['name'].iloc[:]):
        test_img.append(read_img(os.path.join(path, 'test', str(img_path)+'.jpg'), (img_height, img_width)))
    return np.array(train_img), np.array(test_img)

In [3]:
path = '/scratch/yns207/data_invasive/'
model200x300 = 'invasive_customincep200x300_aug8'
model300x400 = 'invasive_customincep2_aug8'
model400x500 = 'invasive_customincep400x500_aug8'

models = glob.glob(os.path.join(path,model200x300+'*')) + glob.glob(os.path.join(path,model300x400+'*')) + glob.glob(os.path.join(path,model400x500+'*'))
models = [model for model in models if '_base' not in model]
models

['/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_1.model',
 '/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_0.model',
 '/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_3.model',
 '/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_4.model',
 '/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_2.model',
 '/scratch/yns207/data_invasive/invasive_customincep2_aug8_2.model',
 '/scratch/yns207/data_invasive/invasive_customincep2_aug8_4.model',
 '/scratch/yns207/data_invasive/invasive_customincep2_aug8_0.model',
 '/scratch/yns207/data_invasive/invasive_customincep2_aug8_3.model',
 '/scratch/yns207/data_invasive/invasive_customincep2_aug8_1.model',
 '/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_2.model',
 '/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_3.model',
 '/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_0.model',
 '/scratch/yns207/data_invasive/invasive_customincep400

In [6]:
train_set = pd.read_csv(os.path.join(path, 'train_labels.csv'))
test_set = pd.read_csv(os.path.join(path, 'sample_submission.csv'))
train_labels = train_set['invasive'].values

preds_df = pd.DataFrame([], columns=['name'])
preds_df['name'] = test_set['name']

In [7]:
train_img, test_img = read_imgs(200,300)
test_img_preprocess = preprocess_input_incep_xcep(test_img.astype(np.float32))

100%|██████████| 2295/2295 [01:17<00:00, 28.45it/s]
100%|██████████| 1531/1531 [00:49<00:00, 30.79it/s]


In [17]:
# getting 200x300 models
for model_name in [model for model in models if '200x300' in model]:
    model = load_model(model_name)
    preds = model.predict(test_img_preprocess)
    preds_df[model_name] = pd.Series(preds.flatten())
preds_df.head()

Unnamed: 0,name,b,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_1.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_0.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_3.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_4.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_2.model
0,1,1,0.887435,0.847788,0.865286,0.873137,0.875292
1,2,2,0.027051,0.066533,0.044588,0.015612,0.044626
2,3,3,0.072524,0.124137,0.077262,0.049406,0.077357
3,4,4,0.104,0.077666,0.050975,0.014422,0.048011
4,5,5,0.513139,0.292332,0.582133,0.335529,0.665967


In [20]:
train_img, test_img = read_imgs(300,400)
test_img_preprocess = preprocess_input_incep_xcep(test_img.astype(np.float32))

100%|██████████| 2295/2295 [01:20<00:00, 27.97it/s]
100%|██████████| 1531/1531 [00:51<00:00, 29.45it/s]


In [22]:
# 300x400 models
for model_name in [model for model in models if 'customincep2_aug8' in model]:
    model = load_model(model_name)
    preds = model.predict(test_img_preprocess)
    preds_df[model_name] = pd.Series(preds.flatten())
preds_df.head()

Unnamed: 0,name,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_1.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_0.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_3.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_4.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_2.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_2.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_4.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_0.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_3.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_1.model
0,1,0.887435,0.847788,0.865286,0.873137,0.875292,0.865032,0.866172,0.87939,0.853244,0.864301
1,2,0.027051,0.066533,0.044588,0.015612,0.044626,0.062713,0.134055,0.055517,0.076696,0.071791
2,3,0.072524,0.124137,0.077262,0.049406,0.077357,0.116012,0.217204,0.257748,0.205751,0.267064
3,4,0.104,0.077666,0.050975,0.014422,0.048011,0.074664,0.259037,0.109604,0.112424,0.058295
4,5,0.513139,0.292332,0.582133,0.335529,0.665967,0.804904,0.586179,0.844371,0.747197,0.71704


In [23]:
train_img, test_img = read_imgs(400,500)
test_img_preprocess = preprocess_input_incep_xcep(test_img.astype(np.float32))

100%|██████████| 2295/2295 [01:24<00:00, 27.75it/s]
100%|██████████| 1531/1531 [00:54<00:00, 28.14it/s]


In [24]:
# 400x500 models
for model_name in [model for model in models if '400x500' in model]:
    model = load_model(model_name)
    preds = model.predict(test_img_preprocess)
    preds_df[model_name] = pd.Series(preds.flatten())
preds_df.head()

Unnamed: 0,name,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_1.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_0.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_3.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_4.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_2.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_2.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_4.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_0.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_3.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_1.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_2.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_3.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_0.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_1.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_4.model
0,1,0.887435,0.847788,0.865286,0.873137,0.875292,0.865032,0.866172,0.87939,0.853244,0.864301,0.893235,0.913099,0.917249,0.897482,0.878274
1,2,0.027051,0.066533,0.044588,0.015612,0.044626,0.062713,0.134055,0.055517,0.076696,0.071791,0.041026,0.058708,0.035605,0.006605,0.048365
2,3,0.072524,0.124137,0.077262,0.049406,0.077357,0.116012,0.217204,0.257748,0.205751,0.267064,0.107761,0.107616,0.06745,0.021934,0.189524
3,4,0.104,0.077666,0.050975,0.014422,0.048011,0.074664,0.259037,0.109604,0.112424,0.058295,0.082608,0.183846,0.090296,0.061905,0.106246
4,5,0.513139,0.292332,0.582133,0.335529,0.665967,0.804904,0.586179,0.844371,0.747197,0.71704,0.521066,0.762035,0.413177,0.904683,0.543214


In [25]:
preds_df.shape

(1531, 16)

In [30]:
# rescale all preds 0.1-0.99
for column in preds_df.columns:
    if column == 'name':
        continue
    preds_df[column] = MinMaxScaler(feature_range=(0.01,0.99)).fit_transform(preds_df[column].values.reshape(-1, 1))
preds_df.head()

Unnamed: 0,name,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_1.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_0.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_3.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_4.model,/scratch/yns207/data_invasive/invasive_customincep200x300_aug8_2.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_2.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_4.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_0.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_3.model,/scratch/yns207/data_invasive/invasive_customincep2_aug8_1.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_2.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_3.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_0.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_1.model,/scratch/yns207/data_invasive/invasive_customincep400x500_aug8_4.model
0,1,0.968996,0.94814,0.971484,0.95792,0.962896,0.954507,0.943616,0.965969,0.931456,0.943969,0.959507,0.979964,0.975368,0.960239,0.955119
1,2,0.035471,0.069459,0.041455,0.026079,0.047764,0.064123,0.144876,0.061952,0.089203,0.083709,0.050926,0.069786,0.044468,0.015997,0.053416
2,3,0.08481,0.134247,0.078481,0.062802,0.083824,0.123272,0.235592,0.283855,0.229177,0.295675,0.122076,0.121888,0.078092,0.032245,0.206786
3,4,0.118962,0.081981,0.048693,0.024786,0.051494,0.077385,0.281232,0.1213,0.127954,0.069059,0.095258,0.203095,0.102215,0.07461,0.116304
4,5,0.562881,0.323416,0.650611,0.373722,0.732286,0.887779,0.638144,0.927544,0.816436,0.784119,0.56272,0.819036,0.443134,0.967872,0.591073


In [33]:
# avg all preds
subm = pd.DataFrame([], columns=['name', 'invasive'])
subm['name'] = test_set['name']
subm['invasive'] = preds_df[[col for col in preds_df.columns if not col is 'name']].mean(axis=1)
subm.head()

Unnamed: 0,name,invasive
0,1,0.95861
1,2,0.059912
2,3,0.144855
3,4,0.106289
4,5,0.672051


In [34]:
subm.to_csv(os.path.join(path, 'results', 'subm_aug9_0.gz'), index=False, compression='gzip')