# Read me

To run on multi-GPU server, please:
[1]increase data_size to a larger number (say 10000):
generate_image_pair_and_labels(X_imgs, labels_dataframe['labels'].tolist(), data_size=100)

[2]increase image size if necessary (say 224):
IMAGE_SIZE = 128

[3]increase GPU numbers, 1 is default
G = 1 #the number of GPU

[4]increase epochs (say 300)
n_epochs = 30

[5]increae batch size if you have large GPU memory (say 50)
batch_size = 10

In [2]:
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import matplotlib.gridspec as gridspec
import numpy as np
import os
import cv2
import pandas as pd
from math import floor, ceil, pi

%matplotlib inline

# Load raw images

In [2]:
def get_image_paths():
    folder = './data/whale/train_full'
    files = os.listdir(folder)
    files.sort()
    files = ['{}/{}'.format(folder, file) for file in files]
    return files

X_img_paths = get_image_paths()
print(X_img_paths[:20])

['./data/whale/train_full/00022e1a.jpg', './data/whale/train_full/000466c4.jpg', './data/whale/train_full/00087b01.jpg', './data/whale/train_full/001296d5.jpg', './data/whale/train_full/0014cfdf.jpg', './data/whale/train_full/0025e8c2.jpg', './data/whale/train_full/0026a8ab.jpg', './data/whale/train_full/0031c258.jpg', './data/whale/train_full/0035632e.jpg', './data/whale/train_full/0037e7d3.jpg', './data/whale/train_full/00389cd7.jpg', './data/whale/train_full/0042dcc4.jpg', './data/whale/train_full/0042ea34.jpg', './data/whale/train_full/00467ae9.jpg', './data/whale/train_full/004a97f3.jpg', './data/whale/train_full/004c5fb9.jpg', './data/whale/train_full/005c57e7.jpg', './data/whale/train_full/006d0aaf.jpg', './data/whale/train_full/0078af23.jpg', './data/whale/train_full/007c3603.jpg']


# Load whale ids

In [3]:
def load_labels():
    csv_file = './data/whale/train.csv'
    data_labels = pd.read_csv(csv_file)
    return data_labels

data_labels = load_labels()
data_labels.head()

Unnamed: 0,Image,Id
0,00022e1a.jpg,w_e15442c
1,000466c4.jpg,w_1287fbc
2,00087b01.jpg,w_da2efe0
3,001296d5.jpg,w_19e5482
4,0014cfdf.jpg,w_f22f3e3


# Id analysis

In [None]:
data_labels['Id'].value_counts()

# select ids occur more than 4 times

In [None]:
def get_label_subset_by_threshold(occurence=5):
    value_statics = data_labels['Id'].value_counts()
    value_cut = value_statics[value_statics >= occurence]
    value_cut = value_cut[value_cut < 100] #remove the "new whale" type
    return value_cut  #return type: pandas series

label_subset = get_label_subset_by_threshold(occurence=5)

# Data augmentation: to do

In [None]:
def data_augmentation(original_image):
    #to do
    
    return original_image

In [None]:
def is_include_subset(label, label_subset):
    if label in label_subset.index:
        return True
    else:
        return False
    
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

# image resize and augmentation

In [None]:
IMAGE_SIZE = 128

def tf_resize_augment_images(X_img_file_paths):
    X_data = []
    y_data = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, (None, None, 1))
    tf_img = tf.image.resize_images(X, (IMAGE_SIZE, IMAGE_SIZE), tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        # Each image is resized individually as different image may be of different size.
        for index, file_path in enumerate(X_img_file_paths):
            label = data_labels.iloc[index]['Id']
            
            if is_include_subset(label, label_subset):
                img = mpimg.imread(file_path)
                if len(img.shape) > 2:# convert to grayscale
                    img = rgb2gray(img)
                img = img.reshape(img.shape[0], img.shape[1], 1)
                resized_img = sess.run(tf_img, feed_dict = {X: img})
                X_data.append(data_augmentation(resized_img))
                y_data.append(label)

    X_data = np.array(X_data, dtype = np.float32) # Convert to numpy
    return X_data, y_data

X_imgs, y_data = tf_resize_augment_images(X_img_paths)
print(X_imgs.shape)

# image nromalization

In [None]:
def image_normalize(images):
    
    return images

# save processed images and labels

In [None]:
np.save('./data/whale/save/resize.npy', X_imgs)
labels_dataframe = pd.DataFrame({'labels':y_data})
labels_dataframe.to_csv('./data/whale/save/labels.csv', index=False)

# load data

In [3]:
X_imgs = np.load('./data/whale/save/resize.npy')
labels_dataframe = pd.read_csv('./data/whale/save/labels.csv')
labels_dataframe.head()

Unnamed: 0,labels
0,w_1287fbc
1,w_da2efe0
2,w_3d0bc7a
3,w_fd1cb9d
4,w_ab6db0f


# generate image pairs and labels(true or false)

In [4]:
import tensorflow as tf
import numpy as np

In [5]:
def generate_image_pair_and_labels(images, labels, data_size=500):
    size1 = data_size // 2 
    size2 = data_size - size1
    if size1 < size2 and np.random.rand() > 0.5:
        size1, size2 = size2, size1
    
    X, y = [], []
    k = 0
    while k < size1:
        idx1, idx2 = np.random.randint(0, len(labels), 2)
        if idx1 != idx2 and labels[idx1] == labels[idx2]:
            X.append(np.array([images[idx1], images[idx2]]))
            y.append([1])
            k += 1
    k = 0
    while k < size2:
        idx1, idx2 = np.random.randint(0, len(labels), 2)
        if labels[idx1] != labels[idx2]:
            X.append(np.array([images[idx1], images[idx2]]))
            y.append([0])
            k += 1
            
    shuffled_idx = np.random.permutation(data_size)
    return np.array(X)[shuffled_idx], np.array(y)[shuffled_idx]

#select 100 pairs to tain siamese network, just for quick test
image_pairs, y_labels = generate_image_pair_and_labels(X_imgs, labels_dataframe['labels'].tolist(), data_size=100)

# split data

In [6]:
test_ratio = 0.3
image_pair_train = image_pairs[:int((1 - test_ratio) * len(y_labels))]
y_labels_train = y_labels[:int((1 - test_ratio) * len(y_labels))]
image_pair_test = image_pairs[int((1 - test_ratio) * len(y_labels)):]
y_labels_test = y_labels[int((1 - test_ratio) * len(y_labels)):]

In [70]:
from keras.layers import Input, Conv2D, Lambda, subtract, merge, Dense, Flatten, MaxPooling2D, BatchNormalization, LeakyReLU, Activation, add
from keras.models import Model, Sequential
from keras.regularizers import l2
from keras import backend as K
from keras.optimizers import SGD,Adam
from keras.losses import binary_crossentropy
from keras.utils.training_utils import multi_gpu_model

K.clear_session()
tf.reset_default_graph()

# define residual network block

In [71]:
def resNet_block(image1, image2, channel, strides):
    conv1 = Conv2D(channel, kernel_size=(3, 3), strides=(strides, strides), kernel_initializer="he_normal", padding='same')
#   batch normalization parameters are not reused  
    bn1 = BatchNormalization()
    bn2 = BatchNormalization()
    rl1 = Activation('relu') 
    conv2 = Conv2D(channel, kernel_size=(3, 3), strides=(1, 1), kernel_initializer="he_normal", padding='same')

    #reuse weights & bias within block
    net1 = conv2(rl1(bn1(conv1(image1))))
    net2 = conv2(rl1(bn2(conv1(image2))))
    
    if strides > 1:
        conv3 = Conv2D(channel, kernel_size=(3, 3), strides=(strides, strides), kernel_initializer="he_normal", padding='same')
        image1 = conv3(image1)
        image2 = conv3(image2)
        
    net1 = add([image1, net1])
    net2 = add([image2, net2])
    bn3 = BatchNormalization()
    bn4 = BatchNormalization()
    rl2 = Activation('relu')
    
    return rl2(bn3(net1)), rl2(bn4(net2))



# define residual network

In [72]:
def residual_network(image1, image2):
    conv1 = Conv2D(32, kernel_size=(7, 7), strides=(2, 2), kernel_initializer="he_normal", padding='same')
    bn1 = BatchNormalization()
    bn2 = BatchNormalization()
    rl1 = Activation('relu')  
    mp1 = MaxPooling2D()
    
    net1, net2 = mp1(rl1(bn1(conv1(image1)))), mp1(rl1(bn2(conv1(image2))))
    
    net1, net2 = resNet_block(net1, net2, 32, 1)
    net1, net2 = resNet_block(net1, net2, 32, 1)
    net1, net2 = resNet_block(net1, net2, 32, 1)
    net1, net2 = resNet_block(net1, net2, 64, 2)
    net1, net2 = resNet_block(net1, net2, 64, 1)
    net1, net2 = resNet_block(net1, net2, 64, 1)
    net1, net2 = resNet_block(net1, net2, 128, 2)
    net1, net2 = resNet_block(net1, net2, 128, 1)
    net1, net2 = resNet_block(net1, net2, 128, 1)
    
    mp2 = MaxPooling2D()
    flatten1 = Flatten()
    
    return flatten1(mp2(net1)), flatten1(mp2(net2))

# define siamese netwrok

In [73]:

learning_rate = 0.00001
input_shape = (2, IMAGE_SIZE, IMAGE_SIZE, 1)
inputs = Input(input_shape)
G = 1 #the number of GPU

In [74]:
def siamese_network(image_pairs):
    # not support tf.ops in keras, fk!!!!
#     image1, image2 = tf.unstack(image_pairs, axis=1)
    image1 = Lambda(lambda x : x[:,0,:,:])(image_pairs)
    image2 = Lambda(lambda x : x[:,1,:,:])(image_pairs)

    embedding1, embedding2 = residual_network(image1, image2)
    
    #merge two encoded inputs with the l1 distance between them
#     subtracted = subtract([embedding1, embedding2])
#     both1 = K.abs(subtracted)
#     print type(both1)
    
    L1_distance = lambda x: K.abs(x[0]-x[1])
    both = merge([embedding1,embedding2], mode = L1_distance, output_shape=lambda x: x[0])
    print type(both)
    
    prediction = Dense(1, activation='sigmoid', kernel_initializer='he_normal')(both)
    
    # check to see if we are compiling using just a single GPU
    if G <= 1:
        print("[INFO] training with 1 GPU...")
        siamese_net = Model(input=image_pairs, output=prediction)
    # otherwise, we are compiling using multiple GPUs
    else:
        print("[INFO] training with {} GPUs...".format(G))

        # we'll store a copy of the model on *every* GPU and then combine
        # the results from the gradient updates on the CPU
        with tf.device("/cpu:0"):
            # initialize the model
            siamese_net = Model(input=image_pairs, output=prediction)

        # make the model parallel
        siamese_net = multi_gpu_model(siamese_net, gpus=G)    
    
    optimizer = Adam(learning_rate)
    siamese_net.compile(loss="binary_crossentropy",optimizer=optimizer, metrics=['accuracy'])
    
    return siamese_net

siamese_net = siamese_network(inputs)

<class 'tensorflow.python.framework.ops.Tensor'>
[INFO] training with 1 GPU...


  from ipykernel import kernelapp as app


# start training

In [75]:
n_epochs = 30
batch_size = 10
n_batches = len(y_labels_train) // batch_size

for epoch in range(n_epochs):    
    idx = np.random.permutation(len(y_labels_train))
    X_batches = np.array_split(image_pair_train[idx], n_batches)
    y_batches = np.array_split(y_labels_train[idx], n_batches)

    for X_batch, y_batch in zip(X_batches, y_batches):
        loss_train, acc_train = siamese_net.train_on_batch(X_batch,y_batch)

    loss_test, acc_test = siamese_net.evaluate(x=image_pair_test, y=y_labels_test, batch_size=10, verbose=0)
    
    print "epoch:{}\tloss_train:{:.3f}\tloss_test:{:.3f}\tacc_train:{:.2f}%\tacc_test:{:.2f}%".format(
        epoch, loss_train, loss_test, acc_train*100, acc_test*100)


epoch:0	loss_train:0.855	loss_test:0.814	acc_train:60.00%	acc_test:43.33%
epoch:1	loss_train:0.714	loss_test:0.758	acc_train:50.00%	acc_test:46.67%
epoch:2	loss_train:0.572	loss_test:0.709	acc_train:50.00%	acc_test:50.00%
epoch:3	loss_train:0.330	loss_test:0.675	acc_train:90.00%	acc_test:56.67%
epoch:4	loss_train:0.668	loss_test:0.651	acc_train:50.00%	acc_test:60.00%
epoch:5	loss_train:0.322	loss_test:0.633	acc_train:100.00%	acc_test:60.00%
epoch:6	loss_train:0.512	loss_test:0.622	acc_train:70.00%	acc_test:60.00%
epoch:7	loss_train:0.403	loss_test:0.619	acc_train:90.00%	acc_test:63.33%
epoch:8	loss_train:0.406	loss_test:0.616	acc_train:90.00%	acc_test:63.33%
epoch:9	loss_train:0.395	loss_test:0.611	acc_train:90.00%	acc_test:63.33%
epoch:10	loss_train:0.543	loss_test:0.610	acc_train:60.00%	acc_test:66.67%
epoch:11	loss_train:0.388	loss_test:0.609	acc_train:80.00%	acc_test:66.67%
epoch:12	loss_train:0.311	loss_test:0.607	acc_train:100.00%	acc_test:66.67%
epoch:13	loss_train:0.366	loss_te