In [1]:
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras import optimizers

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
img_width, img_height = 150, 150
epochs = 50
batch_size = 16
split = 0.9

In [3]:
X_train=np.load('X_train.npy')
target_train=np.load('target_train.npy')
X_train_cv, X_valid, y_train_cv, y_valid = train_test_split(X_train, target_train[:1604], train_size=split)
nb_train_samples = len(X_train_cv)
nb_validation_samples = len(X_valid)
X_test=np.load('X_test.npy')

In [4]:
def save_bottlebeck_features():
    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')
    
    bottleneck_features_train = model.predict(X_train_cv)
    np.save('bottleneck_features_train.npy', bottleneck_features_train)

    bottleneck_features_validation = model.predict(X_valid)
    np.save('bottleneck_features_validation.npy', bottleneck_features_validation)
    
    bottleneck_features_validation = model.predict(X_test)
    np.save('bottleneck_features_test.npy', bottleneck_features_validation)

In [5]:
def get_callbacks(filepath, patience=2):
    es = EarlyStopping('val_loss', patience=patience, mode="min")
    msave = ModelCheckpoint(filepath, save_best_only=True)
    return [es, msave]


In [9]:
def train_top_model():
    train_data = np.load('bottleneck_features_train.npy')
    
    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    model.summary()
    return model
    
    

In [10]:
file_path = 'top_model_weights_path.h5'
callbacks = get_callbacks(filepath=file_path, patience=5)


In [11]:
import os
save_bottlebeck_features()
model = train_top_model()
train_data = np.load('bottleneck_features_train.npy')
validation_data = np.load('bottleneck_features_validation.npy')
    

history = model.fit(train_data, y_train_cv,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(validation_data, y_valid),
                    callbacks=callbacks)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               524544    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 524,801
Trainable params: 524,801
Non-trainable params: 0
_________________________________________________________________
Train on 1443 samples, validate on 161 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Ep

In [12]:
model.load_weights(filepath=file_path)
score = model.evaluate(validation_data, y_valid, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.399125587982
Test accuracy: 0.788819875776


In [None]:
X_test = np.load('bottleneck_features_test.npy')
predicted_test=model.predict(X_test,verbose=1)

In [None]:
import pandas as pd
submission = pd.DataFrame()
test_id=np.load('test_id.npy')
submission['id']=test_id
submission['is_iceberg']=predicted_test.reshape((predicted_test.shape[0]))
submission.to_csv('sub.csv', index=False)