In [42]:
import pandas as pd
import numpy as np
import cv2
import sys
import importlib
SEED = 1234
np.random.seed(SEED) 

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, GlobalAveragePooling2D, AveragePooling2D, Concatenate, Input
from keras.layers import Conv2D, MaxPooling2D, Add
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.utils import plot_model
from keras.applications.vgg16 import VGG16
from keras.models import Model

from sklearn.model_selection import StratifiedKFold, train_test_split
import matplotlib.pyplot as plt
from scipy.ndimage.filters import uniform_filter

%cd E:\kaggle\iceberg

E:\kaggle\iceberg


In [2]:
def display_img(band_1, band_2, is_iceberg, angle = None):
    if angle is None:
        title_str = 'Iceberg' if is_iceberg == 1 else 'Ship'
    else:
        title_str = 'Iceberg-' + str(angle) if is_iceberg == 1 else 'Ship-' + str(angle)
    fig = plt.figure(0, figsize=(10,10))
    ax = fig.add_subplot(1,2,1)
    ax.set_title(title_str + ' - Band 1')
    ax.imshow(band_1,cmap='jet')
    ax = fig.add_subplot(1,2,2)
    ax.set_title(title_str + ' - Band 2')
    ax.imshow(band_2,cmap='jet')
    plt.show()

# implement functions to convert SAR data from decibel units to linear units and back again
def decibel_to_linear(band):
     # convert to linear units
    return np.power(10,np.array(band)/10)

def linear_to_decibel(band):
    return 10*np.log10(band)

# implement the Lee Filter for a band in an image already reshaped into the proper dimensions
def lee_filter(band, window, var_noise = 0.25):
    # band: SAR data to be despeckled (already reshaped into image dimensions)
    # window: descpeckling filter window (tuple)
    # default noise variance = 0.25
    # assumes noise mean = 0
    
    mean_window = uniform_filter(band, window)
    mean_sqr_window = uniform_filter(band**2, window)
    var_window = mean_sqr_window - mean_window**2

    weights = var_window / (var_window + var_noise)
    band_filtered = mean_window + weights*(band - mean_window)
    return band_filtered

def apply_lee_filter(band_1_linear, band_2_linear, window_var_index = 0, noise_var_index = 0):
    windows = [2, 4, 8] # can be tuple too if not symetric
    noise_var = np.array([1, 2, 4])
    noise_var_1 = np.round(np.var(band_1_linear) * noise_var, 10)
    noise_var_2 = np.round(np.var(band_2_linear) * noise_var, 10)
    band_1_linear_filtered = lee_filter(band_1_linear, windows[window_var_index], noise_var_1[noise_var_index])
    band_2_linear_filtered = lee_filter(band_2_linear, windows[window_var_index], noise_var_2[noise_var_index])
    return band_1_linear_filtered, band_2_linear_filtered

def apply_lee_filter_single(band_linear, window_var_index = 0, noise_var_index = 0):
    windows = [2, 4, 8] # can be tuple too if not symetric
    noise_var = np.array([1, 2, 4])
    noise_var = np.round(np.var(band_linear) * noise_var, 10)
    band_linear_filtered = lee_filter(band_linear, windows[window_var_index], noise_var[noise_var_index])
    return band_linear_filtered

def np_get_scaled_band(band_list):
    imgs = []
    for band in band_list:        
        imgs.append((band - band.mean()) / band.std())
    return np.array(imgs)

In [3]:
train = pd.read_json("E:/kaggle/iceberg/train.json/data/processed/train.json")
Y_train=train['is_iceberg']
test = pd.read_json("E:/kaggle/iceberg/test.json/data/processed/test.json")

train['inc_angle']=pd.to_numeric(train['inc_angle'], errors='coerce')#We have only 133 NAs.
test['inc_angle']=pd.to_numeric(test['inc_angle'], errors='coerce')
train['inc_angle']=train['inc_angle'].fillna(method='pad')
test['inc_angle']=test['inc_angle'].fillna(method='pad')
X_angle=train['inc_angle']
X_test_angle=test['inc_angle']

def iso(arr):
    arr = np.reshape(arr, (75,75))
    p = arr > (np.mean(arr) + 2 * np.std(arr))
    return p * arr

def size(arr):     
    return float(np.sum(arr < -5)) / (75 * 75)

train['iso_1'] = train.band_1.apply(iso)
train['iso_2'] = train.band_2.apply(iso)
train['size_1'] = train.iso_1.apply(size)
train['size_2'] = train.iso_2.apply(size)
X_size_1 = np.array(train['size_1'])
X_size_2 = np.array(train['size_2'])

test['iso_1'] = test.band_1.apply(iso)
test['iso_2'] = test.band_2.apply(iso)
test['size_1'] = test.iso_1.apply(size)
test['size_2'] = test.iso_2.apply(size)
test_size_1 = np.array(test['size_1'])
test_size_2 = np.array(test['size_2'])

In [4]:
#Generate the training data
X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
#apply filter
X_band_1_filtered = np.array([apply_lee_filter_single(decibel_to_linear(band)) for band in X_band_1])
X_band_2_filtered = np.array([apply_lee_filter_single(decibel_to_linear(band)) for band in X_band_2])
X_band_1_filtered = linear_to_decibel(X_band_1_filtered)
X_band_2_filtered = linear_to_decibel(X_band_2_filtered)
X_band_1 = X_band_1_filtered
X_band_2 = X_band_2_filtered
X_band_mean = (X_band_1 + X_band_2) / 2
# construct bands
X_band_3=np.fabs(np.subtract(X_band_1,X_band_2))
X_band_4=np.maximum(X_band_1,X_band_2)
X_band_5=np.minimum(X_band_1,X_band_2)
# subtract mean
X_band_3 = np_get_scaled_band(X_band_3)
X_band_4 = np_get_scaled_band(X_band_4)
X_band_5 = np_get_scaled_band(X_band_5)

# X_train = np.concatenate([X_band_3[:, :, :, np.newaxis],X_band_4[:, :, :, np.newaxis],X_band_5[:, :, :, np.newaxis]], axis=-1)
X_train = np.concatenate([X_band_1[:, :, :, np.newaxis],X_band_2[:, :, :, np.newaxis],X_band_mean[:, :, :, np.newaxis]], axis=-1)

X_band_test_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
X_band_test_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
#apply filter
X_band_test_1_filtered = np.array([apply_lee_filter_single(decibel_to_linear(band)) for band in X_band_test_1])
X_band_test_2_filtered = np.array([apply_lee_filter_single(decibel_to_linear(band)) for band in X_band_test_2])
X_band_test_1_filtered = linear_to_decibel(X_band_test_1_filtered)
X_band_test_2_filtered = linear_to_decibel(X_band_test_2_filtered)
X_band_test_1 = X_band_test_1_filtered
X_band_test_2 = X_band_test_2_filtered
X_band_test_mean = (X_band_test_1 + X_band_test_2) / 2
# construct bands
X_band_test_3=np.fabs(np.subtract(X_band_test_1,X_band_test_2))
X_band_test_4=np.maximum(X_band_test_1,X_band_test_2)
X_band_test_5=np.minimum(X_band_test_1,X_band_test_2)
# subtract mean
X_band_test_3 = np_get_scaled_band(X_band_test_3)
X_band_test_4 = np_get_scaled_band(X_band_test_4)
X_band_test_5 = np_get_scaled_band(X_band_test_5)

# X_test = np.concatenate([X_band_test_3[:, :, :, np.newaxis], X_band_test_4[:, :, :, np.newaxis],X_band_test_5[:, :, :, np.newaxis]],axis=-1)
X_test = np.concatenate([X_band_test_1[:, :, :, np.newaxis], X_band_test_2[:, :, :, np.newaxis],X_band_test_mean[:, :, :, np.newaxis]],axis=-1)

In [5]:
# resize_shape = tuple(np.array(X_train.shape[1:3]) * 2)
# X_train = np.array([cv2.resize(img, resize_shape) for img in X_train])
# X_test = np.array([cv2.resize(img, resize_shape) for img in X_test])

In [12]:
print(X_train.shape, Y_train.shape, X_angle.shape, X_size_1.shape, X_test.shape)

(1604, 75, 75, 3) (1604,) (1604,) (1604,) (8424, 75, 75, 3)


In [7]:
def getModel():
    img_input = Input(shape=X_train.shape[1:], name="images")
    angle_input = Input(shape=[1], name="angle")
    
    # ==================== flow1 ====================
    flow1_x = img_input
    # -------------------- block1 -------------------
    flow1_shortcut1 = flow1_x
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='flow1_block1_conv1')(flow1_x)
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='flow1_block1_conv2')(flow1_x)
    flow1_x = BatchNormalization()(flow1_x)
    flow1_x = MaxPooling2D((2, 2), strides=(2, 2), name='flow1_block1_pool')(flow1_x)
    # -------------------- shortcut1 -------------------
    flow1_shortcut1 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', name='flow1_shortcut1')(flow1_shortcut1)
    flow1_x = Add(name='flow1_shortcut1_add')([flow1_shortcut1, flow1_x])
    flow1_x = Dropout(0.2)(flow1_x)
    # -------------------- block2 -------------------
    flow1_shortcut2 = flow1_x
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='flow1_block2_conv1')(flow1_x)
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='flow1_block2_conv2')(flow1_x)
    flow1_x = BatchNormalization()(flow1_x)
    flow1_x = MaxPooling2D((2, 2), strides=(2, 2), name='flow1_block2_pool')(flow1_x)
    # -------------------- shortcut2 -------------------
    flow1_shortcut2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', name='flow1_shortcut2')(flow1_shortcut2)
    flow1_x = Add(name='flow1_shortcut2_add')([flow1_shortcut2, flow1_x])
    flow1_x = Dropout(0.2)(flow1_x)
    # -------------------- block3 -------------------
    flow1_shortcut3 = flow1_x
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='flow1_block3_conv1')(flow1_x)
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='flow1_block3_conv2')(flow1_x)
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='flow1_block3_conv3')(flow1_x)
    flow1_x = BatchNormalization()(flow1_x)
    flow1_x = MaxPooling2D((2, 2), strides=(2, 2), name='flow1_block3_pool')(flow1_x)
    # -------------------- shortcut3 -------------------
    flow1_shortcut3 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='same', name='flow1_shortcut3')(flow1_shortcut3)
    flow1_x = Add(name='flow1_shortcut3_add')([flow1_shortcut3, flow1_x])
    flow1_x = Dropout(0.2)(flow1_x)
    # -------------------- block4 -------------------
    flow1_shortcut4 = flow1_x
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='flow1_block4_conv1')(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='flow1_block4_conv2')(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='flow1_block4_conv3')(flow1_x)
    flow1_x = BatchNormalization()(flow1_x)
    flow1_x = MaxPooling2D((2, 2), strides=(2, 2), name='flow1_block4_pool')(flow1_x)
    # -------------------- shortcut4 -------------------
    flow1_shortcut4 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', name='flow1_shortcut4')(flow1_shortcut4)
    flow1_x = Add(name='flow1_shortcut4_add')([flow1_shortcut4, flow1_x])
    flow1_x = Dropout(0.2)(flow1_x)
    # -------------------- block5 -------------------
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='flow1_block5_conv1')(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='flow1_block5_conv2')(flow1_x)
    flow1_x = BatchNormalization()(flow1_x)
    flow1_x = MaxPooling2D((2, 2), strides=(2, 2), name='flow1_block5_pool')(flow1_x)
    # -------------------- block6 -------------------
    flow1_x = GlobalAveragePooling2D(name='flow1_block6_global_avg')(flow1_x)
    flow1_x = Dropout(0.1)(flow1_x)
    
    predictions = Dense(1, activation='sigmoid', name='predictions')(flow1_x)
    model = Model(inputs=img_input, outputs=predictions)
    optimizer = Adam(lr=1e-4)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [46]:
from keras.utils.data_utils import get_file

def getVgg19PlusModel():
    img_input = Input(shape=X_train.shape[1:], name="images")
    angle_input = Input(shape=[1], name="angle")
    size_1 = Input(shape=[1], name="size_1")
    size_2 = Input(shape=[1], name="size_2")
    
    # ==================== flow1 ====================
    flow1_x = img_input
    # -------------------- block1 -------------------
    flow1_x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(flow1_x)
    flow1_x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(flow1_x)
#     flow1_x = BatchNormalization()(flow1_x)
    flow1_x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(flow1_x)
    # -------------------- block2 -------------------
    flow1_x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(flow1_x)
    flow1_x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(flow1_x)
#     flow1_x = BatchNormalization()(flow1_x)
    flow1_x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(flow1_x)
    # -------------------- block3 -------------------
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(flow1_x)
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(flow1_x)
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(flow1_x)
    flow1_x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(flow1_x)
#     flow1_x = BatchNormalization()(flow1_x)
    flow1_x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(flow1_x)
    # -------------------- block4 -------------------
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(flow1_x)
#     flow1_x = BatchNormalization()(flow1_x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(flow1_x)
    # -------------------- block5 -------------------
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv4')(flow1_x)
#     flow1_x = BatchNormalization()(flow1_x)
    flow1_x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(flow1_x)
    # -------------------- block6 -------------------
    flow1_x = BatchNormalization()(flow1_x)
    flow1_x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv1')(flow1_x)
    flow1_x = BatchNormalization()(flow1_x)
    flow1_x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block6_conv2')(flow1_x)
    flow1_x = Dropout(0.1)(flow1_x)
    
    flow1_x = GlobalAveragePooling2D()(flow1_x)
    flow1_x = Dropout(0.1)(flow1_x)
    flow1_x = Concatenate()([flow1_x, size_1])
    predictions = Dense(1, activation='sigmoid', name='predictions')(flow1_x)
    model = Model(inputs=[img_input, size_1], outputs=predictions)
    weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', None, cache_subdir='models')
    model.load_weights(weights_path, by_name=True)
    optimizer = Adam(lr=1e-4)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model
getModel = getVgg19PlusModel

In [47]:
model = getModel()
model.summary()
plot_model(model, to_file="k_scale_net.png")

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
images (InputLayer)              (None, 75, 75, 3)     0                                            
____________________________________________________________________________________________________
block1_conv1 (Conv2D)            (None, 75, 75, 64)    1792        images[0][0]                     
____________________________________________________________________________________________________
block1_conv2 (Conv2D)            (None, 75, 75, 64)    36928       block1_conv1[0][0]               
____________________________________________________________________________________________________
block1_pool (MaxPooling2D)       (None, 37, 37, 64)    0           block1_conv2[0][0]               
___________________________________________________________________________________________

In [40]:
gen = ImageDataGenerator(horizontal_flip = True,
                         vertical_flip = True,
                         width_shift_range = 0.1,
                         height_shift_range = 0.1,
                         channel_shift_range=0,
                         zoom_range = 0.5,
                         rotation_range = 10)

def get_callbacks(filepath):
    es = EarlyStopping('val_loss', patience=20, mode="min")
    reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave, reduce_lr_loss]

def flow_x1_x2_y(X1, X2, Y, batch_size, seed):
    X1Y = gen.flow(X1, Y, batch_size=batch_size, seed=SEED)
    X1X2 = gen.flow(X1, X2, batch_size=batch_size, seed=SEED)
    while True:
        X1, Y = X1Y.next()
        _, X2 = X1X2.next()
        yield [X1, X2], Y

In [44]:
K=3
epochs = 150
batch_size = 32
def Train_StratifiedKFold():
    Kfolds = list(StratifiedKFold(n_splits=K, shuffle=True, random_state=SEED).split(X_train, Y_train))
    for j, (train_idx, test_idx) in enumerate(Kfolds):
        print('\n==========FOLD %s=========='% j)
        Xtrain_cv = X_train[train_idx]
        Ytrain_cv = Y_train[train_idx]
        Xangle_cv = X_angle[train_idx]
        Xsize1_cv = X_size_1[train_idx]

        Xtrain_val = X_train[test_idx]
        Ytrain_val = Y_train[test_idx]
        Xangle_val = X_angle[test_idx]
        Xsize1_val = X_size_1[test_idx]

        Xtrain_input = [X_train, X_size_1]
        Xval_input = [Xtrain_val, Xsize1_val]

        model_file = 'k_scale_net_%s.hdf5' % j
        model = getModel()

        steps = np.ceil(len(Xtrain_cv) / batch_size) * 3
        model.fit_generator(
            flow_x1_x2_y(Xtrain_cv, Xsize1_cv, Ytrain_cv, batch_size=batch_size, seed=SEED), 
            steps_per_epoch=steps, epochs=epochs, verbose=1, shuffle=True, 
            callbacks=get_callbacks(model_file), validation_data=(Xval_input, Ytrain_val))

        model.load_weights(filepath = model_file)    

        score = model.evaluate(Xtrain_input, Y_train, verbose=1)
        print('Train loss:', score[0])
        print('Train accuracy:', score[1])
        score = model.evaluate(Xval_input, Ytrain_val, verbose=1)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

def Train_KFold(j=0):
    Xtrain_cv, Xtrain_val, Ytrain_cv, Ytrain_val, Xangle_cv, Xangle_val, Xsize1_cv, Xsize1_val = train_test_split(X_train, Y_train, X_angle, X_size_1, test_size=0.3, shuffle = True, random_state=SEED)
    Xtrain_input = [X_train, X_size_1]
    Xval_input = [Xtrain_val, Xsize1_val]

    model_file = 'k_scale_net_%s.hdf5' % j
    model = getModel()

    steps = np.ceil(len(Xtrain_cv) / batch_size) * 3
    model.fit_generator(
        flow_x1_x2_y(Xtrain_cv, Xsize1_cv, Ytrain_cv, batch_size=batch_size, seed=SEED), 
        steps_per_epoch=steps, epochs=epochs, verbose=1, shuffle=True, 
        callbacks=get_callbacks(model_file), validation_data=(Xval_input, Ytrain_val))

    model.load_weights(filepath = model_file)    

    score = model.evaluate(Xtrain_input, Y_train, verbose=1)
    print('Train loss:', score[0])
    print('Train accuracy:', score[1])
    score = model.evaluate(Xval_input, Ytrain_val, verbose=1)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

In [48]:
Train_KFold()

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 00016: reducing learning rate to 9.999999747378752e-06.
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 00026: reducing learning rate to 9.999999747378752e-07.
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 00033: reducing learning rate to 9.999999974752428e-08.
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Train loss: 0.16821044546
Train accuracy: 0.936408977556
Test loss: 0.241464865158
Test accuracy: 0.904564315353


In [74]:
def predict_StratifiedKFold():
    test_randround = 3
    test_pred = 0
    for j in range(K):
        Xtest_input = [X_test, test_size_1]
        model_file = 'k_scale_net_%s.hdf5' % j
        model = getModel()
        model.load_weights(filepath = model_file)    
        for i in range(test_randround):
            test_steps = np.ceil(X_test.shape[0] / batch_size)
            test_pred += model.predict_generator(flow_x1_x2_y(*Xtest_input, range(X_test.shape[0]), batch_size=batch_size, seed=i), steps=test_steps, verbose=1).reshape(X_test.shape[0])
            print(test_pred.shape)
    test_pred /= K * test_randround

def predict_KFold(j=0, K=1, test_randround=3):
    test_pred = 0
    for _ in range(K):
        Xtest_input = [X_test, test_size_1]
        model_file = 'k_scale_net_%s.hdf5' % j
        model = getModel()
        model.load_weights(filepath = model_file)    
        for i in range(test_randround):
            test_steps = np.ceil(X_test.shape[0] / batch_size)
            test_pred += model.predict_generator(flow_x1_x2_y(*Xtest_input, range(X_test.shape[0]), batch_size=batch_size, seed=i), steps=test_steps, verbose=1).reshape(X_test.shape[0])
            print(test_pred.shape)
    test_pred /= K * test_randround
    return test_pred

In [75]:
test_pred = predict_KFold()



In [None]:
submission = pd.DataFrame({'id': df_test["id"], 'is_iceberg': test_pred})
print(submission.count(), Xtest.shape[0])

submission.to_csv('submission-k-scale-net.csv', index=False)