## Load model

In [1]:
import h5py
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


Indicate if you follow the bottleneck approach or the fit_generator

In [2]:
bottleneck = True

## Build model:

In [3]:
if bottleneck:
    from keras.models import load_model
    
    model = load_model("head_model.h5")
    
else:
    from resnet50 import ResNet50
    from keras.models import Model
    from keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
    print("BUILDING BODY...")
    body = ResNet50(input_shape=(300,300,3))
    head = body.output
    head = BatchNormalization(axis=3)(head)
    head = GlobalAveragePooling2D()(head)
    head = Dense(2, activation="softmax")(head)
    model = Model(body.input, head)
    print("LOADING WEIGHTS ...")
    model.load_weights("full_model.h5")

## Get test batches for filenames (and training non-bottleneck approach)

In [4]:
BATCH_SIZE = 8
gen = ImageDataGenerator()
test_batches = gen.flow_from_directory("test", model.input_shape[1:3], batch_size=BATCH_SIZE,
                                       shuffle=False, class_mode=None)

Found 12500 images belonging to 1 classes.


## Generate predictions

In [5]:
if bottleneck:
    with h5py.File("300_bottlenecks.h5") as hf:
        X_test = hf["test"][:]
    y_test = model.predict(X_test)

else:
    y_test = model.predict_generator(test_batches, test_batches.nb_sample)

## Load sample submission

In [6]:
import pandas as pd

In [7]:
subm = pd.read_csv("sample_submission.csv")

## Extract oredered ids from ordered bottlenecks

In [8]:
ids = [int(x.split("\\")[1].split(".")[0]) for x in test_batches.filenames]

## Fill the sample submission

In [9]:
for i in range(len(ids)):
    subm.loc[subm.id == ids[i], "label"] = y_test[:,1][i]

## Write the new submission

In [10]:
subm.to_csv("submission4.csv", index=False)

In [11]:
subm.head()

Unnamed: 0,id,label
0,1,0.999932
1,2,1.0
2,3,0.999964
3,4,0.998971
4,5,2.2e-05


## Try clipping

In [12]:
clipped = y_test.clip(min=0.02, max=0.98)

In [13]:
for i in range(len(ids)):
    subm.loc[subm.id == ids[i], "label"] = clipped[:,1][i]

In [14]:
subm.to_csv("submission4_clipped.csv", index=False)

In [15]:
subm.head()

Unnamed: 0,id,label
0,1,0.98
1,2,0.98
2,3,0.98
3,4,0.98
4,5,0.02
