In [1]:
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import matplotlib.gridspec as gridspec
import numpy as np
import os
import cv2
import pandas as pd
from math import floor, ceil, pi

%matplotlib inline

# Load raw images

In [2]:
def get_image_paths():
    folder = './data/whale/train_full'
    files = os.listdir(folder)
    files.sort()
    files = ['{}/{}'.format(folder, file) for file in files]
    return files

X_img_paths = get_image_paths()
print(X_img_paths[:20])

['./data/whale/train_full/00022e1a.jpg', './data/whale/train_full/000466c4.jpg', './data/whale/train_full/00087b01.jpg', './data/whale/train_full/001296d5.jpg', './data/whale/train_full/0014cfdf.jpg', './data/whale/train_full/0025e8c2.jpg', './data/whale/train_full/0026a8ab.jpg', './data/whale/train_full/0031c258.jpg', './data/whale/train_full/0035632e.jpg', './data/whale/train_full/0037e7d3.jpg', './data/whale/train_full/00389cd7.jpg', './data/whale/train_full/0042dcc4.jpg', './data/whale/train_full/0042ea34.jpg', './data/whale/train_full/00467ae9.jpg', './data/whale/train_full/004a97f3.jpg', './data/whale/train_full/004c5fb9.jpg', './data/whale/train_full/005c57e7.jpg', './data/whale/train_full/006d0aaf.jpg', './data/whale/train_full/0078af23.jpg', './data/whale/train_full/007c3603.jpg']


# Load whale ids

In [3]:
def load_labels():
    csv_file = './data/whale/train.csv'
    data_labels = pd.read_csv(csv_file)
    return data_labels

data_labels = load_labels()
data_labels.head()

Unnamed: 0,Image,Id
0,00022e1a.jpg,w_e15442c
1,000466c4.jpg,w_1287fbc
2,00087b01.jpg,w_da2efe0
3,001296d5.jpg,w_19e5482
4,0014cfdf.jpg,w_f22f3e3


# Id analysis

In [4]:
data_labels['Id'].value_counts()

new_whale    810
w_1287fbc     34
w_98baff9     27
w_7554f44     26
w_1eafe46     23
w_693c9ee     22
w_fd1cb9d     22
w_ab4cae2     22
w_43be268     21
w_73d5489     21
w_987a36f     21
w_f19faeb     20
w_9b401eb     19
w_95874a5     19
w_c0d494d     18
w_b7d5069     18
w_dbda0d6     17
w_0e737d0     17
w_eb0a6ed     17
w_18eee6e     17
w_17ee910     16
w_b0e05b1     16
w_6c803bf     16
w_67de30b     16
w_a59905f     16
w_9ca943b     15
w_89e159a     15
w_ee17a08     15
w_cae7677     15
w_b074cdf     14
            ... 
w_ed67618      1
w_ddb4c8d      1
w_e3b3ade      1
w_771136b      1
w_4e85c68      1
w_9dcf002      1
w_874cf52      1
w_9845f16      1
w_d7ffaf2      1
w_ebb16ab      1
w_dfd3f5e      1
w_985877e      1
w_8e93d0e      1
w_8bcf29b      1
w_a69bb2b      1
w_471ae98      1
w_f4f3f6d      1
w_959b917      1
w_0b0d88d      1
w_d4251cb      1
w_f2d87b0      1
w_a91600a      1
w_ee23a5f      1
w_45aac7a      1
w_b856fc1      1
w_4e505cc      1
w_9874f0d      1
w_5102893     

# select ids occur more than 4 times

In [5]:
def get_label_subset_by_threshold(occurence=5):
    value_statics = data_labels['Id'].value_counts()
    value_cut = value_statics[value_statics >= occurence]
    value_cut = value_cut[value_cut < 100] #remove the "new whale" type
    return value_cut  #return type: pandas series

label_subset = get_label_subset_by_threshold(occurence=5)

# Data augmentation: to do

In [6]:
def data_augmentation(original_image):
    #to do
    
    return original_image

In [7]:
def is_include_subset(label, label_subset):
    if label in label_subset.index:
        return True
    else:
        return False
    
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

# image resize and augmentation

In [8]:
IMAGE_SIZE = 224

def tf_resize_augment_images(X_img_file_paths):
    X_data = []
    y_data = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, (None, None, 1))
    tf_img = tf.image.resize_images(X, (IMAGE_SIZE, IMAGE_SIZE), tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        # Each image is resized individually as different image may be of different size.
        for index, file_path in enumerate(X_img_file_paths):
            label = data_labels.iloc[index]['Id']
            
            if is_include_subset(label, label_subset):
                img = mpimg.imread(file_path)
                if len(img.shape) > 2:# convert to grayscale
                    img = rgb2gray(img)
                img = img.reshape(img.shape[0], img.shape[1], 1)
                resized_img = sess.run(tf_img, feed_dict = {X: img})
                X_data.append(data_augmentation(resized_img))
                y_data.append(label)

    X_data = np.array(X_data, dtype = np.float32) # Convert to numpy
    return X_data, y_data

X_imgs, y_data = tf_resize_augment_images(X_img_paths)
print(X_imgs.shape)

(2508, 224, 224, 1)


# save processed images and labels

In [None]:
np.save('./data/whale/save/resize.npy', X_imgs)
labels_dataframe = pd.DataFrame({'labels':y_data})
labels_dataframe.to_csv('./data/whale/save/labels.csv', index=False)

# load data

In [9]:
X_imgs = np.load('./data/whale/save/resize.npy')
labels_dataframe = pd.read_csv('./data/whale/save/labels.csv')
labels_dataframe.head()

Unnamed: 0,labels
0,w_1287fbc
1,w_da2efe0
2,w_3d0bc7a
3,w_fd1cb9d
4,w_ab6db0f


In [10]:
import tensorflow as tf
import numpy as np

# generate image pairs and labels(true or false)

In [11]:
def generate_image_pair_and_labels(images, labels, data_size=500):
    size1 = data_size // 2 
    size2 = data_size - size1
    if size1 < size2 and np.random.rand() > 0.5:
        size1, size2 = size2, size1
    
    X, y = [], []
    k = 0
    while k < size1:
        idx1, idx2 = np.random.randint(0, len(labels), 2)
        if idx1 != idx2 and labels[idx1] == labels[idx2]:
            X.append(np.array([images[idx1], images[idx2]]))
            y.append([1])
            k += 1
    k = 0
    while k < size2:
        idx1, idx2 = np.random.randint(0, len(labels), 2)
        if labels[idx1] != labels[idx2]:
            X.append(np.array([images[idx1], images[idx2]]))
            y.append([0])
            k += 1
            
    shuffled_idx = np.random.permutation(data_size)
    return np.array(X)[shuffled_idx], np.array(y)[shuffled_idx]

#select 100 pairs to tain siamese network, just for quick test
image_pairs, y_labels = generate_image_pair_and_labels(X_imgs, labels_dataframe['labels'].tolist(), data_size=100)

# split data

In [12]:
test_ratio = 0.3
image_pair_train = image_pairs[:int((1 - test_ratio) * len(y_labels))]
y_labels_train = y_labels[:int((1 - test_ratio) * len(y_labels))]
image_pair_test = image_pairs[int((1 - test_ratio) * len(y_labels)):]
y_labels_test = y_labels[int((1 - test_ratio) * len(y_labels)):]

# Rerun the model from here

In [13]:
tf.reset_default_graph()
he_init = tf.contrib.layers.variance_scaling_initializer()

  from ._conv import register_converters as _register_converters


# define residual network block

In [14]:
def resNet_block(inputs, channel, strides, activation=tf.nn.relu, reuse=False, name=None):
    net = tf.layers.conv2d(inputs, channel, 3, strides=strides, kernel_initializer=he_init, activation=activation, 
                           padding="same", reuse=reuse, name=name+'_conv1')
    net = tf.layers.conv2d(net, channel, 3, strides=1, kernel_initializer=he_init, padding="same", reuse=reuse, name=name+'_conv2')

    if strides > 1:
        inputs = tf.layers.conv2d(inputs, channel, 3, strides=strides, kernel_initializer=he_init, padding="same", 
                                  reuse=reuse, name=name+'_conv3')
    net = tf.add_n([inputs, net])
    net = activation(net)
    return net


# define residual network 

In [15]:
def residual_network(inputs, activation=tf.nn.relu, reuse=False, name=None):
    net = tf.layers.conv2d(inputs, 32, 7, strides=2, kernel_initializer=he_init, activation=activation, 
                           padding="same", reuse=reuse, name=name+'_conv1')
    net = tf.layers.max_pooling2d(net, 2, 2)
    
    net = resNet_block(net, 32, 1, activation=activation, reuse=reuse, name=name+'_block1')
    net = resNet_block(net, 32, 1, activation=activation, reuse=reuse, name=name+'_block2')
    net = resNet_block(net, 32, 1, activation=activation, reuse=reuse, name=name+'_block3')
    net = resNet_block(net, 64, 2, activation=activation, reuse=reuse, name=name+'_block4')
    net = resNet_block(net, 64, 1, activation=activation, reuse=reuse, name=name+'_block5')
    net = resNet_block(net, 64, 1, activation=activation, reuse=reuse, name=name+'_block6')
    net = resNet_block(net, 128, 2, activation=activation, reuse=reuse, name=name+'_block7')
    net = resNet_block(net, 128, 1, activation=activation, reuse=reuse, name=name+'_block8')
    net = resNet_block(net, 128, 1, activation=activation, reuse=reuse, name=name+'_block9')
    
    net = tf.contrib.layers.flatten(net)
    
    return net

# define siamese netwrok

In [16]:
def siamese_network(image1, image2):
    embedding1 = residual_network(image1, activation=tf.nn.relu, reuse=False, name='resNet1')
    embedding2 = residual_network(image2, activation=tf.nn.relu, reuse=True, name='resNet1')
    net = tf.concat([embedding1, embedding2], axis=1)
    net = tf.layers.dense(net, units=20, activation=tf.nn.elu, kernel_initializer=he_init)
    logits = tf.layers.dense(net, units=1, kernel_initializer=he_init, name="outputs")
    y_prob = tf.nn.sigmoid(logits)
    
    return logits, y_prob

# define train module

In [17]:
def train(logits, y_prob, y_labels):
    loss = tf.losses.log_loss(y_labels, y_prob)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.00001)
    train_op = optimizer.minimize(loss)
    y_pred = tf.cast(tf.greater_equal(logits, 0), tf.int32)
    y_pred_correct = tf.equal(y_pred, y)
    accuracy = tf.reduce_mean(tf.cast(y_pred_correct, tf.float32))
    loss_summary = tf.summary.scalar("loss", loss)
    accuracy_summary = tf.summary.scalar("accuracy", accuracy)
    summary_op = tf.summary.merge_all()
    
    return loss, train_op, accuracy, summary_op

# starting training

In [18]:
height = 224
width = 224
channel = 1


X = tf.placeholder(tf.float32, shape=(None, 2, height, width, channel), name="X")
X1, X2 = tf.unstack(X, axis=1)
y = tf.placeholder(tf.int32, shape=(None), name="y")

logits, y_prob = siamese_network(X1, X2)
loss, train_op, accuracy, summary_op = train(logits, y_prob, y)
init = tf.global_variables_initializer()
# print([x.name for x in tf.global_variables()])

In [22]:
n_epochs = 30
batch_size = 5

n_batches = len(y_labels_train) // batch_size

with tf.Session() as sess:
    sess.run(init)
        
    for epoch in range(n_epochs):    
        idx = np.random.permutation(len(y_labels_train))
        X_batches = np.array_split(image_pair_train[idx], n_batches)
        y_batches = np.array_split(y_labels_train[idx], n_batches)
#         print X_batches[0].shape
        
        for X_batch, y_batch in zip(X_batches, y_batches):
            sess.run(train_op, feed_dict={X: X_batch, y: y_batch})
        loss_train, accuracy_train = sess.run([loss, accuracy], feed_dict={X: X_batch, y: y_batch})
        loss_test, accuracy_test = sess.run([loss, accuracy], feed_dict={X: image_pair_test, y: y_labels_test})
        
        print "{}\tloss_train:{:.3f}\tloss_test:{:.3f}\tacc_train:{:.2f}%\tacc_test:{:.2f}%".format(
            epoch, loss_train, loss_test, accuracy_train*100, accuracy_test*100)

0	loss_train:6.447	loss_test:6.985	acc_train:60.00%	acc_test:56.67%
1	loss_train:9.671	loss_test:6.985	acc_train:40.00%	acc_test:56.67%
2	loss_train:12.894	loss_test:6.985	acc_train:20.00%	acc_test:56.67%
3	loss_train:9.671	loss_test:6.985	acc_train:40.00%	acc_test:56.67%
4	loss_train:9.671	loss_test:6.985	acc_train:40.00%	acc_test:56.67%
5	loss_train:16.118	loss_test:6.985	acc_train:0.00%	acc_test:56.67%
6	loss_train:6.447	loss_test:6.985	acc_train:60.00%	acc_test:56.67%
7	loss_train:12.894	loss_test:6.985	acc_train:20.00%	acc_test:56.67%
8	loss_train:9.671	loss_test:6.985	acc_train:40.00%	acc_test:56.67%
9	loss_train:3.224	loss_test:6.985	acc_train:80.00%	acc_test:56.67%
10	loss_train:9.671	loss_test:6.985	acc_train:40.00%	acc_test:56.67%
11	loss_train:9.671	loss_test:6.985	acc_train:40.00%	acc_test:56.67%
12	loss_train:6.447	loss_test:6.985	acc_train:60.00%	acc_test:56.67%
13	loss_train:6.447	loss_test:6.985	acc_train:60.00%	acc_test:56.67%
14	loss_train:12.894	loss_test:6.985	acc_t