In [7]:
import gc
import tqdm
import keras
import random
import numpy as np
import pandas as pd
from PIL import Image as im
from keras import layers as L
from keras import backend as K
from keras import activations as A
from keras import losses
from keras.models import Model
from matplotlib import pyplot as plt

In [30]:
most_common_reso = (1050, 700)
input_shape = (700, 1050, 3)

In [31]:
train_csv = pd.read_csv("datasets/train.csv")
test_csv = pd.read_csv("datasets/sample_submission.csv")

In [28]:
dataset = np.memmap("datasets/train.npy", dtype=np.uint8, mode="w+", shape=(train_csv.shape[0], 700, 1050, 3))

In [29]:
for i in tqdm.tqdm_notebook(range(train_csv.shape[0])):
    image = train_csv.iloc[i, 0]
    img = im.open("datasets/train/%s"%image)
    if img.size != most_common_reso:
        img = img.resize(most_common_reso)
    array = np.asarray(img)
    img.close()
    if len(array.shape) == 2:
        array = np.expand_dims(array, 2)
        array = np.concatenate([array]*3, axis=-1)
    dataset[i] = array
    gc.collect()

HBox(children=(IntProgress(value=0, max=25361), HTML(value='')))




In [32]:
indices = {id_:list() for id_ in train_csv.Id.unique()}

In [33]:
for i in range(train_csv.shape[0]):
    indices[train_csv.iloc[i,1]].append(i)

In [34]:
ids = list(indices)
ids_no_new = ids[:]
ids_no_new.remove("new_whale")

In [35]:
def generator(batch_size=32):
    batch_features_1, batch_features_2 = np.zeros((2*batch_size, 700, 1050, 3)), np.zeros((2*batch_size, 700, 1050, 3), dtype=np.float32)
    batch_labels = np.zeros((2*batch_size, 1))
    while True:
        for i in range(batch_size):
            while True:
                id_ = random.choice(ids_no_new)
                if len(indices[id_])>=2:
                    break
            idx_1, idx_2 = random.sample(indices[id_], 2)
            batch_features_1[i] = dataset[idx_1]/255.
            batch_features_2[i] = dataset[idx_2]/255.
            batch_labels[i, 0] = 0
        for i in range(batch_size, 2*batch_size):
            id_1, id_2 = random.sample(ids, 2)
            idx_1, idx_2 = random.choice(indices[id_1]), random.choice(indices[id_2])
            batch_features_1[i] = dataset[idx_1]/255.
            batch_features_2[i] = dataset[idx_2]/255.
            batch_labels[i, 0] = 1
        
        yield [batch_features_1, batch_features_2], batch_labels

In [None]:
def create_base_network(input_shape):
    input_ = L.Input(shape=input_shape)
    x = L.Conv2D(filters=8, kernel_size=3, strides=1, padding="same", activation=None)(input_)
    x = L.Activation(activation=A.relu)(x)
    x = L.Conv2D(filters=8, kernel_size=3, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.MaxPool2D(pool_size=2, strides=2)(x)
    
    x = L.Conv2D(filters=16, kernel_size=3, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.Conv2D(filters=16, kernel_size=3, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.MaxPool2D(pool_size=2, strides=2)(x)
    
    x = L.Conv2D(filters=32, kernel_size=3, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.Conv2D(filters=32, kernel_size=3, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.MaxPool2D(pool_size=2, strides=2)(x)
    
    x = L.Conv2D(filters=32, kernel_size=3, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.Conv2D(filters=32, kernel_size=3, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.Conv2D(filters=32, kernel_size=1, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.MaxPool2D(pool_size=2, strides=2)(x)
    
    x = L.Conv2D(filters=16, kernel_size=3, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.Conv2D(filters=16, kernel_size=3, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.Conv2D(filters=16, kernel_size=1, strides=1, padding="same", activation=None)(x)
    x = L.Activation(activation=A.relu)(x)
    x = L.MaxPool2D(pool_size=2, strides=2)(x)
    
    x = L.Flatten()(x)
    x = L.Dense(2048, activation=A.relu)(x)
    x = L.Dense(1024, activation=A.relu)(x)
    x = L.Dense(512, activation=A.relu)(x)
    x = L.Dense(256, activation=A.relu)(x)
    x = L.Dense(128, activation=A.relu)(x)
    #x = L.Dense(64, activation=A.relu)(x)
    return Model(input_, x)

def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)

def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    sqaure_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square)

def accuracy(y_true, y_pred):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))

In [67]:
input_a = L.Input(shape=input_shape)
input_b = L.Input(shape=input_shape)
base_network = create_base_network(input_shape)
processed_a = base_network(input_a)
processed_b = base_network(input_b)

In [68]:
distance = L.Lambda(euclidean_distance,output_shape=eucl_dist_output_shape)([processed_a, processed_b])
model = Model([input_a, input_b], distance)

In [72]:
opt = keras.optimizers.Adam(1e-3)
model.compile(loss=contrastive_loss, optimizer=opt, metrics=[accuracy])

In [76]:
history = model.fit_generator(generator=generator(batch_size=32), steps_per_epoch=32, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
E

Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 256/1000
Epoch 257/1000
Epoch 258/1000
Epoch 259/1000
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000
Epoch 266/1000
Epoch 267/1000
Epoch 268/1000
Epoch 269/1000
Epoch 270/1000
Epoch 271/1000
Epoch 272/1000
Epoch 273/1000
Epoch 274/1000
Epoch 275/1000
Epoch 276/1000
Epoch 277/1000
Epoch 278/1000
Epoch 279/1000
Epoch 280/1000
Epoch 281/1000
Epoch 282/1000
Epoch 283/1000
Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/1000
Epoch 298/1000
Epoch 299/1000
Epoch 300/1000
Epoch 301/1000
Epoch 302/1000
Epoch 303/1000
Epoch 304/1000
Epoch 305/1000
Epoch 306/1000
Epoch 307/1000
Epoch 308/1000
Epoch 309/1000
Epoch 310/

Epoch 323/1000
Epoch 324/1000
Epoch 325/1000
Epoch 326/1000
Epoch 327/1000
Epoch 328/1000
Epoch 329/1000
Epoch 330/1000
Epoch 331/1000
Epoch 332/1000
Epoch 333/1000
Epoch 334/1000
Epoch 335/1000
Epoch 336/1000
Epoch 337/1000
Epoch 338/1000
Epoch 339/1000
Epoch 340/1000
Epoch 341/1000
Epoch 342/1000
Epoch 343/1000
Epoch 344/1000
Epoch 345/1000
Epoch 346/1000
Epoch 347/1000
Epoch 348/1000
Epoch 349/1000
Epoch 350/1000
Epoch 351/1000
Epoch 352/1000
Epoch 353/1000
Epoch 354/1000
Epoch 355/1000
Epoch 356/1000
Epoch 357/1000
Epoch 358/1000
Epoch 359/1000
Epoch 360/1000
Epoch 361/1000
Epoch 362/1000
Epoch 363/1000
Epoch 364/1000
Epoch 365/1000
Epoch 366/1000
Epoch 367/1000
Epoch 368/1000
Epoch 369/1000
Epoch 370/1000
Epoch 371/1000
Epoch 372/1000
Epoch 373/1000
Epoch 374/1000
Epoch 375/1000
Epoch 376/1000
Epoch 377/1000
Epoch 378/1000
Epoch 379/1000
Epoch 380/1000
Epoch 381/1000
Epoch 382/1000
Epoch 383/1000
Epoch 384/1000
Epoch 385/1000
Epoch 386/1000
Epoch 387/1000
Epoch 388/1000
Epoch 389/

Epoch 402/1000
Epoch 403/1000
Epoch 404/1000
Epoch 405/1000
Epoch 406/1000
Epoch 407/1000
Epoch 408/1000
Epoch 409/1000
Epoch 410/1000
Epoch 411/1000
Epoch 412/1000
Epoch 413/1000
Epoch 414/1000
Epoch 415/1000
Epoch 416/1000
Epoch 417/1000
Epoch 418/1000
Epoch 419/1000
Epoch 420/1000
Epoch 421/1000
Epoch 422/1000
Epoch 423/1000
Epoch 424/1000
Epoch 425/1000
Epoch 426/1000
Epoch 427/1000
Epoch 428/1000
Epoch 429/1000
Epoch 430/1000
Epoch 431/1000
Epoch 432/1000
Epoch 433/1000
Epoch 434/1000
Epoch 435/1000

KeyboardInterrupt: 

In [86]:
tr_v, tr_l = next(g)

In [87]:
preds = model.predict(tr_v)

In [88]:
co

0.27734375

In [93]:
np.round(preds)

array([[1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],

In [94]:
tr_l

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],